target_dir <- "../../ppcc_iflu_data/read_data"
# Reading the data back in
s_exp1 <- read.csv(file.path(target_dir, "s_exp1.csv"))
s_exp1_et <- read.csv(file.path(target_dir, "s_exp1_et.csv"))
digit_span <- read.csv(file.path(target_dir, "digit_span.csv"))
battery <- read.csv(file.path(target_dir, "battery.csv"))
lextale <- read.csv(file.path(target_dir, "lextale.csv"))
italian_lextale <- read.csv(file.path(target_dir, "italian_lextale.csv"))
autism <- read.csv(file.path(target_dir, "autism.csv"))
acoustic_data <- read.csv(file.path(target_dir, "acoustic_data.csv"))
removal <- read.csv(file.path(target_dir, "removal.csv"))

#battery tasks

conflicts_prefer(dplyr::filter)
## [conflicted] Will prefer dplyr::filter over any other package.
battery_tidy<-battery%>%
  select(participant.private.id,tree.node.key,task.name,screen.name,screen.number,display,reaction.time,response,sound1,sound2,base1,base2,same)%>%
  filter(display=="block" &screen.name == "Screen 3")%>%
  mutate(responses=case_when(response == "'z'"~1,
                             response == "'m'"~0),
         correct=if_else(responses==same,1,0),
         difference=abs(base1-base2))

battery_tidy%>%ggplot(aes(x=reaction.time))+
  geom_histogram()+
  facet_wrap(participant.private.id~.)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

mad_standard=3

battery_cleaned<-battery_tidy%>%
  #group_by(participant.private.id)%>%
  dplyr::filter(reaction.time>200)%>%
  mutate(log_rt=sqrt(reaction.time))%>%
  mutate(display_medians = median(log_rt),
         abs_distance = abs(display_medians-log_rt),
         mean_distance = mean(abs_distance),
         upper_mad = display_medians+(mean_distance*mad_standard),
         lower_mad = display_medians-(mean_distance*mad_standard))%>%
  group_by(participant.private.id)%>%
  mutate(participants_medians = median(log_rt))%>%
  dplyr::filter(log_rt<upper_mad &log_rt>lower_mad)


removal[nrow(removal) + 1, ] <- c("battery removal", 
                                  nrow(battery_tidy) - nrow(battery_cleaned))

battery_cleaned%>%ggplot(aes(x=log_rt))+
  geom_histogram()+
  geom_point(aes(x=upper_mad,y=0),color="red")+
  geom_point(aes(x=lower_mad,y=0),color="blue")+
  facet_wrap(participant.private.id~.)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

battery_cleaned%>%ggplot(aes(x=factor(participant.private.id),y=participants_medians))+
  geom_point()+
  geom_point(aes(y=upper_mad,x=factor(participant.private.id)),color="red")+
  geom_point(aes(y=lower_mad,x=factor(participant.private.id)),color="blue")

battery_cleaned_part_agg<-battery_cleaned%>%
  group_by(participant.private.id)%>%
summarise(score=mean(correct))%>%
  ungroup()%>%
    mutate(medians = median(score),
           abs_distance = abs(medians-score),
           mean_distance = mean(abs_distance),
           upper_mad = medians+(mean_distance*mad_standard),
           lower_mad = medians-(mean_distance*mad_standard))

battery_cleaned_part_agg%>%
  ggplot(aes(x=factor(participant.private.id),y=score))+
  geom_point()+
  geom_point(aes(x=factor(participant.private.id),y=upper_mad),color="red")+
  geom_point(aes(x=factor(participant.private.id),y=lower_mad),color="red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

battery_cleaned_part_agg_keep<-battery_cleaned_part_agg%>%
  filter(score<upper_mad & score>lower_mad)

battery_cleaned_item_agg<-battery_cleaned%>%
  group_by(difference,participant.private.id)%>%
summarise(score=mean(correct))%>%
  ungroup()%>%
    mutate(medians = median(score),
           abs_distance = abs(medians-score),
           mean_distance = mean(abs_distance),
           upper_mad = medians+(mean_distance*mad_standard),
           lower_mad = medians-(mean_distance*mad_standard))
## `summarise()` has grouped output by 'difference'. You can override using the
## `.groups` argument.
battery_cleaned_item_agg%>%
  ggplot(aes(x=factor(difference),y=score))+
  geom_boxplot()+
  geom_point()+
  geom_point(aes(x=factor(difference),y=upper_mad),color="red")+
  geom_point(aes(x=factor(difference),y=lower_mad),color="red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

battery_agg2<-battery_cleaned%>%
  group_by(participant.private.id, task.name) %>%
  summarize(score=mean(correct))
## `summarise()` has grouped output by 'participant.private.id'. You can override
## using the `.groups` argument.
battery_agg2%>%ggplot(aes(x=task.name,y=score,color=factor(task.name)))+
  geom_boxplot()+
  geom_jitter()+
  theme_minimal()

battery_agg<-battery_cleaned%>%
  mutate(
    hit = if_else(same == 1 & responses == 1L, 1L, 0L),
    f_a = if_else(same == 0 & responses == 1L, 1L, 0L),
    miss = if_else(same == 0 & responses == 0L, 1L, 0L),
    c_r = if_else(same == 1 & responses == 0L, 1L, 0L))%>%
  group_by(participant.private.id, task.name) %>%
  summarize(
    score=mean(correct)+.5,
    hit_count = sum(hit)+.5,
    f_a_count = sum(f_a)+.5,
    miss_count = sum(miss)+.5,
    c_r_count = sum(c_r)+.5,
    .groups = 'drop')%>%
  mutate(
    dprime = dprime(hit_count, 
                             f_a_count, 
                             hit_count + miss_count, 
                             f_a_count + c_r_count)$dprime,
    c = dprime(hit_count, 
                        f_a_count, 
                        hit_count + miss_count, 
                        f_a_count + c_r_count)$c)%>%
  select(participant.private.id, task.name, dprime, c)


battery_agg%>%ggplot(aes(x=factor(task.name),y=dprime,color=factor(task.name),alpha=.4))+
  geom_line(aes(group=factor(participant.private.id)),color="black")+
  geom_violin()+
  geom_boxplot(width=.5)+
  geom_point()+
  theme_minimal()

battery_agg%>%ggplot(aes(x=c,y=dprime,color=factor(task.name)))+
  geom_point()+
  theme_minimal()+
  facet_grid(.~factor(task.name))

battery_agg_wider<-battery_agg%>%
  pivot_wider(names_from = task.name,values_from = c(dprime:c))

battery_agg_wider_simp<-battery_agg_wider
ggpairs(battery_agg_wider_simp%>%select(!participant.private.id))

#digit span

digit_tidy<-digit_span%>%
  select(participant.private.id,tree.node.key,task.name,
         target,response,length,correct.,total.correct)%>%
  filter(correct.==1)%>%
  group_by(participant.private.id)%>%
  mutate(length=as.numeric(length))%>%
  summarize(max_working_memory=max(length),
         min_working_memory=min(length),
         mean_working_memory=mean(length),
         max_correct=max(total.correct))

digit_tidy%>%
  ggplot(aes(x=1,y=max_correct,color=factor(max_working_memory)))+
  geom_jitter()

digit_cleaned<-digit_tidy%>%
  ungroup()%>%
  mutate(display_medians = median(max_correct),
         abs_distance = abs(display_medians-mean_working_memory),
         mean_distance = mean(abs_distance),
         upper_mad = display_medians+(mean_distance*mad_standard),
         lower_mad = display_medians-(mean_distance*mad_standard))%>%
  filter(max_correct<upper_mad &max_correct>lower_mad)

digit_cleaned%>%
  ggplot(aes(x=mean_working_memory))+
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

digit_cleaned%>%
  ggplot(aes(x=1,y=mean_working_memory,color=factor(max_working_memory)))+
  geom_jitter()

removal[nrow(removal) + 1, ] <- c("digit_span_participant", 
                                  nrow(digit_tidy) - nrow(digit_cleaned))

digit_cleaned_simp<-digit_cleaned%>%select(participant.private.id,mean_working_memory)

#lextale English

lextale_tidy<-lextale%>%
  select(participant.private.id,screen.name,zone.type,reaction.time,correct)%>%
  filter(zone.type=="response_keyboard")%>%
  filter(reaction.time>250)

lextale_tidy%>%ggplot(aes(x=reaction.time))+geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

lextale_agg<-lextale_tidy%>%
  group_by(participant.private.id)%>%
  summarize(lextale_score=mean(correct))

lextale_agg_cleaned<-lextale_agg%>%
  ungroup()%>%
  mutate(medians = median(lextale_score),
         abs_distance = abs(medians-lextale_score),
         mean_distance = mean(abs_distance),
         upper_mad = medians+(mean_distance*mad_standard),
         lower_mad = medians-(mean_distance*mad_standard))%>%
  filter(lextale_score<upper_mad &lextale_score>lower_mad)


lextale_agg_cleaned%>%ggplot(aes(x=lextale_score))+
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

removal[nrow(removal) + 1, ] <- c("lextale_agg_participant", 
                                  nrow(lextale_agg) - nrow(lextale_agg_cleaned))

lextale_agg_cleaned_simp<-lextale_agg_cleaned%>%select(lextale_score,participant.private.id)

#lextale italian

italian_lextale_tidy<-italian_lextale%>%
  select(participant.private.id,screen.name,zone.type,reaction.time,correct)%>%
  filter(zone.type=="response_keyboard")%>%
  filter(reaction.time>250)

italian_lextale_tidy%>%ggplot(aes(x=reaction.time))+geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

italian_lextale_agg<-italian_lextale_tidy%>%
  group_by(participant.private.id)%>%
  summarize(italian_lextale_score=mean(correct))

italian_lextale_agg_cleaned<-italian_lextale_agg%>%
  ungroup()%>%
  mutate(medians = median(italian_lextale_score),
         abs_distance = abs(medians-italian_lextale_score),
         mean_distance = mean(abs_distance),
         upper_mad = medians+(mean_distance*mad_standard),
         lower_mad = medians-(mean_distance*mad_standard))%>%
  filter(italian_lextale_score<upper_mad &italian_lextale_score>lower_mad)


italian_lextale_agg_cleaned%>%ggplot(aes(x=italian_lextale_score))+
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

removal[nrow(removal) + 1, ] <- c("italian_lextale_agg_participant", 
                                  nrow(italian_lextale_agg) - nrow(italian_lextale_agg_cleaned))

italian_lextale_agg_cleaned_simp<-italian_lextale_agg_cleaned%>%select(italian_lextale_score,participant.private.id)

#ASD scores

autism_tidy<-autism%>%
  select(participant.private.id,question.key,response)%>%
  filter(str_detect(question.key, "quantised"))%>%
  mutate(response=as.numeric(response))


agree<-c(1, 2, 4, 5, 6, 7, 9, 12, 13, 16, 18, 19, 20, 21, 
         22, 23, 26, 33, 35, 39, 41,42, 43, 45, 46)
disagree<-c(3, 8, 10, 11, 14, 15, 17, 24, 25, 27, 28, 29, 
            30, 31, 32,34, 36, 37, 38, 40, 44, 47, 48, 49, 50)
agr_key<-c("1","2")
dis_key<-c("3","4")

autism_cleaned<-autism_tidy%>%
  mutate(key_response = str_replace_all(question.key, "AQ", ""))%>%
  mutate(key_response = str_replace_all(key_response, "-quantised", ""))%>%
  mutate(points=case_when(key_response%in%agree & response %in% agr_key~1,
                          key_response%in%agree & response %in% dis_key~0,
                          key_response%in%disagree & response %in% dis_key~1,
                          key_response%in%disagree & response %in% agr_key~0))%>%
  group_by(participant.private.id)%>%
  summarize(autism_score=mean(points))

autism_cleaned%>%ggplot(aes(x=autism_score))+geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#minor task clean up above #experiment 1 behaviorial data clean up

s_exp1_cleaned<-s_exp1%>%
  select(participant.private.id,spreadsheet.row,response,real_1_distractor_0,zone.type,display,audio,
         q1_image,q2_image,q3_image,q4_image,
         id,grouping_id,sorting,audio)%>%
  filter(zone.type=="response_button_image")%>%
  mutate(audio=tolower(audio),
         word_play = str_remove(str_extract(audio, "^[^_]+"), "\\.wav"),
         word_play_start = substr(word_play, 1, 3),
         stress_play = str_remove(str_extract(audio, "(?<=_)[^_]+"), "\\.wav"),
         response_word = str_extract(response, "(?<=\\d)[a-zA-Z]+(?=\\.jpeg)"),
         response_thing = str_extract(response, "(?<=\\D)\\d{2}(?=[a-zA-Z]+\\.jpeg)"),
         correct=if_else(response_word==word_play,1,0))%>%
  mutate(stress_play=if_else(stress_play=="p","Penultimate","Anti-penultimate"),
         q1_word= str_extract(q1_image, "(?<=\\d)[a-zA-Z]+(?=\\.jpeg)"),
         q2_word= str_extract(q2_image, "(?<=\\d)[a-zA-Z]+(?=\\.jpeg)"),
         q3_word= str_extract(q3_image, "(?<=\\d)[a-zA-Z]+(?=\\.jpeg)"),
         q4_word= str_extract(q4_image, "(?<=\\d)[a-zA-Z]+(?=\\.jpeg)"))%>%
 mutate(target_quantile=case_when(q1_word==word_play~1,
                                 q2_word==word_play~2,
                                 q3_word==word_play~3,
                                 q4_word==word_play~4),
       competitor_quantile = case_when(
      q1_word != word_play & str_detect(q1_word,word_play_start) ~ 1,
      q2_word != word_play & str_detect(q2_word,word_play_start) ~ 2,
      q3_word != word_play & str_detect(q3_word,word_play_start) ~ 3,
      q4_word != word_play & str_detect(q4_word,word_play_start) ~ 4))%>%
  rowwise() %>%
  mutate(distractor_1_quantile = {
    possible_values <- setdiff(1:4, c(target_quantile, competitor_quantile))
    sample(possible_values, 1)
  })%>%
  mutate(distractor_2_quantile = 10-sum(distractor_1_quantile,target_quantile,competitor_quantile))%>%
  ungroup()

s_exp1_cleaned<-s_exp1_cleaned%>%
  mutate(target = case_when(target_quantile == 1 ~ q1_word,
                            target_quantile == 2 ~ q2_word,
                            target_quantile == 3 ~ q3_word,
                            target_quantile == 4 ~ q4_word),
         competitor = case_when(competitor_quantile == 1 ~ q1_word,
                                competitor_quantile == 2 ~ q2_word,
                                competitor_quantile == 3 ~ q3_word,
                                competitor_quantile == 4 ~ q4_word),
         dist_1 = case_when(distractor_1_quantile == 1 ~ q1_word,
                                  distractor_1_quantile == 2 ~ q2_word,
                                  distractor_1_quantile == 3 ~ q3_word,
                                  distractor_1_quantile == 4 ~ q4_word),
         dist_2 = case_when(distractor_2_quantile == 1 ~ q1_word,
                                  distractor_2_quantile == 2 ~ q2_word,
                                  distractor_2_quantile == 3 ~ q3_word,
                                  distractor_2_quantile == 4 ~ q4_word))
item_agg<-s_exp1_cleaned%>%group_by(word_play)%>%
  summarise(score=mean(correct))%>%
  ungroup()%>%
  mutate(medians = median(score),
         abs_distance = abs(medians-score),
         mean_distance = mean(abs_distance),
         upper_mad = medians+(mean_distance*mad_standard),
         lower_mad = medians-(mean_distance*mad_standard))

item_agg%>%
  ggplot(aes(x=factor(word_play),y=score))+
  geom_point()+
  geom_point(aes(x=factor(word_play),y=upper_mad),color="red")+
  geom_point(aes(x=factor(word_play),y=lower_mad),color="red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

item_agg_keep<-item_agg%>%
  filter(score<upper_mad & score>lower_mad)

removal[nrow(removal) + 1, ] <- c("exp1_item_removed", 
                                  length(unique(item_agg$word_play)) - length(unique(item_agg_keep$word_play)))

part_agg<-s_exp1_cleaned%>%group_by(participant.private.id)%>%
  summarise(score=mean(correct))%>%
  ungroup()%>%
    mutate(medians = median(score),
           abs_distance = abs(medians-score),
           mean_distance = mean(abs_distance),
           upper_mad = medians+(mean_distance*mad_standard),
           lower_mad = medians-(mean_distance*mad_standard))

part_agg%>%
  ggplot(aes(x=factor(participant.private.id),y=score))+
  geom_point()+
  geom_point(aes(x=factor(participant.private.id),y=upper_mad),color="red")+
  geom_point(aes(x=factor(participant.private.id),y=lower_mad),color="red")+
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))

part_agg_keep<-part_agg%>%
  filter(score<upper_mad & score>lower_mad)

removal[nrow(removal) + 1, ] <- c("exp1_part_removed", 
                                  length(unique(part_agg$participant.private.id)) - length(unique(part_agg_keep$participant.private.id)))

#s_exp1_cleaned<-s_exp1_cleaned%>%filter(participant.private.id%in%part_agg_keep$participant.private.id)
#s_exp1_cleaned<-s_exp1_cleaned%>%filter(word_play%in%item_agg_keep$word_play)

length(unique(s_exp1_cleaned$participant.private.id))
## [1] 50
length(unique(s_exp1_cleaned$word_play))
## [1] 128
list_o_word<-c('canapa','candido','celebre','codice','comico','eremo','estero','federa','forbice','fragola','impeto','lattice','loculo','macabro','mastice','missile','monito','panico','protesi','remora','salice','senape','tonaca','zigomo','canale','candito','celeste','codino','comizio','erede','esteta','fedele','forbito','fragore','impero','lattina','locusta','macaco','mastino','missiva','monile','panino','protesta','remoto','saliva','senato','tonale','zigote')

s_exp1_cleaned<-s_exp1_cleaned%>%
  filter(correct==1)%>%
  filter(word_play%in%list_o_word)

prac<-s_exp1_cleaned%>%ungroup()%>%group_by(real_1_distractor_0,stress_play,word_play)%>%count()

#exp 1 eye-tracking data clean up

s_exp1_et_cleaned<-s_exp1_et%>%
  select(participant_id,spreadsheet_row,type,time_stamp,
         time_elapsed,x_pred_normalised,y_pred_normalised,face_conf)%>%
  mutate(participant.private.id=participant_id,
         spreadsheet.row=spreadsheet_row)%>%
  select(!c(participant_id,spreadsheet_row))%>%
  filter(type=="prediction")
  
origin<-c(0,0)
s_exp1_data_all <- s_exp1_et_cleaned %>%
  left_join(s_exp1_cleaned)%>%
  na.omit()%>%
  filter(face_conf>.5)%>%
  mutate(x_pred_normalised=x_pred_normalised-.5,
         y_pred_normalised=y_pred_normalised-.5)%>%
  filter(x_pred_normalised<1&x_pred_normalised>-1)%>%
  filter(y_pred_normalised<1&y_pred_normalised>-1)%>%
  mutate(current_quadrant = case_when(
            x_pred_normalised < origin[1] & y_pred_normalised > origin[2] ~ q1_word,
            x_pred_normalised > origin[1] & y_pred_normalised > origin[2] ~ q2_word,
            x_pred_normalised < origin[1] & y_pred_normalised < origin[2] ~ q3_word,
            x_pred_normalised > origin[1] & y_pred_normalised < origin[2] ~ q4_word))%>%
  mutate(target_looks=if_else(current_quadrant==target,1,0),
         competitor_looks=if_else(current_quadrant==competitor,1,0),
         dist1_looks=if_else(current_quadrant==dist_1,1,0),
         dist2_looks=if_else(current_quadrant==dist_2,1,0))%>%
  group_by(participant.private.id,spreadsheet.row,stress_play)%>%
  mutate(time=time_stamp-min(time_stamp),
         time_rounded = round(time / 50) * 50)%>%
  mutate(time_rounded=time_rounded-1000)%>%
  mutate(time_rounded=time_rounded-200)%>%
  filter(time_rounded<1000&time_rounded>=0)
## Joining with `by = join_by(participant.private.id, spreadsheet.row)`
ets_exp1_all_agg<-s_exp1_data_all%>%
  group_by(stress_play,time_rounded)%>%
  summarise(mean_target_looks=mean(target_looks),
            mean_competitor_looks=mean(competitor_looks),
            mean_dist1_looks=mean(dist1_looks),
            mean_dist2_looks=mean(dist2_looks))%>%
  ungroup()%>%
  pivot_longer(cols=c(mean_target_looks:mean_dist2_looks),names_to = "names",values_to="values")
## `summarise()` has grouped output by 'stress_play'. You can override using the
## `.groups` argument.
penn_times<-c(300, 500, 650)
anti_times<-c(390, 530, 625)

df_1 <- data.frame(
  time_rounded = penn_times,
  values = 0.6,
  labels = c("ca", "LA", "ta")
)

df_2 <- data.frame(
  time_rounded = anti_times,
  values = 0.6,
  labels = c("CA", "la", "mo")
)

plot1<-ets_exp1_all_agg %>%
  filter(stress_play == "Penultimate") %>%
  ggplot(aes()) +
  geom_text(data=df_1,aes(label=labels,x = time_rounded, y = values),
            family = "Arial", size = 4, fontface = "bold")+
  geom_point(aes(x = time_rounded, y = values, color = names)) +
  geom_smooth(aes(x = time_rounded, y = values, color = names),se=FALSE)+
  scale_y_continuous(breaks=seq(0, 1, by = .25),
                     labels=seq(0, 1, by = .25))+
  scale_x_continuous(
    breaks = seq(min(ets_exp1_all_agg$time_rounded, na.rm = TRUE), 
                 max(ets_exp1_all_agg$time_rounded, na.rm = TRUE), by = 200),
    labels = seq(min(ets_exp1_all_agg$time_rounded, na.rm = TRUE), 
                 max(ets_exp1_all_agg$time_rounded, na.rm = TRUE), by = 200),
    minor_breaks = c(0,200,201, 396, 566, 699,800), 
    position = "bottom")+
  theme_minimal() +
  theme(
    axis.text.x.top = element_text(color = "black"),
    axis.text.x = element_text(color = "black"),
    panel.grid.major.x = element_line(color = "grey", size = 0.1),
    panel.grid.minor.x = element_line(linetype = "dotted", color = "black", size = 0.5),
    panel.grid.minor.y = element_blank(),
    panel.grid.major.y = element_line(color = "grey", size = 0.1),
    legend.position = c(0.1, 0.9),
    legend.justification = c(0, 1),
    legend.background = element_rect(fill = "white", colour = "black"))+
  labs(x = NULL,y= "") +
  guides(color = guide_legend(title = "Looks to:"))+
  scale_color_manual(
    values = c("mean_target_looks" = "#c41230", "mean_competitor_looks" = "#007bc0", 
               "mean_dist1_looks" = "#A7A7A9", "mean_dist2_looks" = "#000000"),
    labels = c("Competitor", "Distractor", "Distractor","Target")
  )
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
## 3.5.0.
## ℹ Please use the `legend.position.inside` argument of `theme()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
plot2<-ets_exp1_all_agg %>%
  filter(stress_play == "Anti-penultimate") %>%
  ggplot(aes()) +
  geom_text(data=df_2,aes(label=labels,x = time_rounded, y = values),
            family = "Arial", size = 4, fontface = "bold")+
  geom_point(aes(x = time_rounded, y = values, color = names)) +
  geom_smooth(aes(x = time_rounded, y = values, color = names),se=FALSE)+
  scale_y_continuous(breaks=seq(0, 1, by = .25),
                     labels=seq(0, 1, by = .25))+
  scale_x_continuous(
    breaks = seq(min(ets_exp1_all_agg$time_rounded, na.rm = TRUE), 
                 max(ets_exp1_all_agg$time_rounded, na.rm = TRUE), by = 200),
    labels = seq(min(ets_exp1_all_agg$time_rounded, na.rm = TRUE), 
                 max(ets_exp1_all_agg$time_rounded, na.rm = TRUE), by = 200),
    minor_breaks = c(0,200,201, 499, 566, 669,800),
    position = "bottom")+
  theme_minimal() +
  theme(
    axis.text.x.top = element_text(color = "black"),
    axis.text.x = element_blank(),
    panel.grid.major.x = element_line(color = "grey", size = 0.1),
    panel.grid.minor.x = element_line(linetype = "dotted", color = "black", size = 0.5),
    panel.grid.minor.y = element_blank(),
    panel.grid.major.y = element_line(color = "grey", size = 0.1),
    legend.position = "none",
    legend.justification = c(0, 1),
    legend.background = element_rect(fill = "white", colour = "black"))+
  labs(x = NULL,y= "") +
  guides(color = guide_legend(title = "Looks to:"))+
  scale_color_manual(
    values = c("mean_target_looks" = "#c41230", "mean_competitor_looks" = "#007bc0", 
               "mean_dist1_looks" = "#A7A7A9", "mean_dist2_looks" = "#000000"),
    labels = c("Competitor", "Distractor", "Distractor","Target")
  )

combined_plot <- plot_grid(plot1, plot2, ncol = 1)+
  draw_label("Anti-penultimate (bottom) and penultimate (top) items proportion of looks", x = 0.02, y = 0.5, angle = 90, size = 14)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
combined_plot

ggsave("../visuals/pen_vs_anti_pen.jpeg", plot = combined_plot, width = 6, height = 8, dpi = 300)
penn_times<-c(396, 566,699)
anti_times<-c(499, 566,669)

comp_targ<-s_exp1_data_all%>%
  ungroup()%>%
  filter(time_rounded<699)%>%
  mutate(syllable=case_when(time_rounded<200~0,
                            stress_play=="Penultimate" & time_rounded>200 &
                              time_rounded<penn_times[1]~1,
                            stress_play=="Penultimate" & time_rounded>=penn_times[1] &
                              time_rounded<mean(penn_times[2],penn_times[3]) ~2,
                            stress_play=="Penultimate" & time_rounded<=penn_times[3]~3,
                            stress_play=="Anti-penultimate" & time_rounded>200 &
                              time_rounded<anti_times[1]~1,
                            stress_play=="Anti-penultimate" & time_rounded>=anti_times[1] &
                              time_rounded<mean(anti_times[2],anti_times[3]) ~2,
                            stress_play=="Anti-penultimate" & time_rounded<=anti_times[3]~3))

comp_tar_longer<-comp_targ%>%
  pivot_longer(cols=c(target_looks:dist2_looks),names_to = "names",values_to="values")%>%
  mutate(syllable=as.factor(syllable),
         stress_play=as.factor(stress_play))


contrasts(comp_tar_longer$stress_play)<-c(.5,-.5)
contrasts(comp_tar_longer$stress_play)
##                  [,1]
## Anti-penultimate  0.5
## Penultimate      -0.5
comp_tar_longer$names<-as.factor(comp_tar_longer$names)
contrasts(comp_tar_longer$names)
##                  dist1_looks dist2_looks target_looks
## competitor_looks           0           0            0
## dist1_looks                1           0            0
## dist2_looks                0           1            0
## target_looks               0           0            1
#data$looks <- factor(data$looks, levels = c("dist1_looks", "dist2_looks", "target_looks", "competitor_looks"))


contrasts(factor(comp_tar_longer$names))
##                  dist1_looks dist2_looks target_looks
## competitor_looks           0           0            0
## dist1_looks                1           0            0
## dist2_looks                0           1            0
## target_looks               0           0            1
contrasts(comp_tar_longer$stress_play)
##                  [,1]
## Anti-penultimate  0.5
## Penultimate      -0.5
levels(comp_tar_longer$stress_play)<-c("Anti-penultimate","Penultimate")
levels(comp_tar_longer$stress_play)
## [1] "Anti-penultimate" "Penultimate"
comp_tar_model<-glm(values~names*stress_play*syllable,data=comp_tar_longer,family = "binomial")
comp_tar_model1<-glm(values~stress_play*names+syllable,data=comp_tar_longer,family = "binomial")
comp_tar_model2<-glm(values~stress_play+names*syllable,data=comp_tar_longer,family = "binomial")
comp_tar_model3<-glm(values~stress_play+names+syllable,data=comp_tar_longer,family = "binomial")
summary(comp_tar_model)
## 
## Call:
## glm(formula = values ~ names * stress_play * syllable, family = "binomial", 
##     data = comp_tar_longer)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.9914  -0.7900  -0.6976  -0.1046   1.9024  
## 
## Coefficients:
##                                           Estimate Std. Error z value Pr(>|z|)
## (Intercept)                              -0.988512   0.025298 -39.074  < 2e-16
## namesdist1_looks                         -0.111691   0.036266  -3.080  0.00207
## namesdist2_looks                         -0.169471   0.036565  -4.635 3.57e-06
## namestarget_looks                        -0.165842   0.036528  -4.540 5.62e-06
## stress_play1                             -0.032196   0.050597  -0.636  0.52457
## syllable1                                 0.088697   0.036242   2.447  0.01439
## syllable2                                 0.065407   0.040449   1.617  0.10588
## syllable3                                -0.034831   0.039581  -0.880  0.37887
## namesdist1_looks:stress_play1            -0.010309   0.072532  -0.142  0.88698
## namesdist2_looks:stress_play1             0.182874   0.073129   2.501  0.01239
## namestarget_looks:stress_play1           -0.039573   0.073056  -0.542  0.58804
## namesdist1_looks:syllable1               -0.264449   0.052877  -5.001 5.70e-07
## namesdist2_looks:syllable1               -0.228770   0.053211  -4.299 1.71e-05
## namestarget_looks:syllable1               0.111961   0.051916   2.157  0.03104
## namesdist1_looks:syllable2               -0.576100   0.061398  -9.383  < 2e-16
## namesdist2_looks:syllable2               -0.419772   0.060773  -6.907 4.94e-12
## namestarget_looks:syllable2               0.550645   0.056570   9.734  < 2e-16
## namesdist1_looks:syllable3               -0.362953   0.058726  -6.180 6.39e-10
## namesdist2_looks:syllable3               -0.344982   0.059209  -5.826 5.66e-09
## namestarget_looks:syllable3               0.680661   0.055045  12.365  < 2e-16
## stress_play1:syllable1                    0.064657   0.072483   0.892  0.37238
## stress_play1:syllable2                    0.007727   0.080899   0.096  0.92390
## stress_play1:syllable3                    0.009700   0.079162   0.123  0.90248
## namesdist1_looks:stress_play1:syllable1  -0.063136   0.105754  -0.597  0.55050
## namesdist2_looks:stress_play1:syllable1  -0.197715   0.106423  -1.858  0.06319
## namestarget_looks:stress_play1:syllable1 -0.006079   0.103831  -0.059  0.95332
## namesdist1_looks:stress_play1:syllable2   0.033787   0.122796   0.275  0.78320
## namesdist2_looks:stress_play1:syllable2  -0.108914   0.121546  -0.896  0.37021
## namestarget_looks:stress_play1:syllable2  0.054556   0.113140   0.482  0.62967
## namesdist1_looks:stress_play1:syllable3   0.049859   0.117452   0.425  0.67120
## namesdist2_looks:stress_play1:syllable3   0.025804   0.118419   0.218  0.82751
## namestarget_looks:stress_play1:syllable3 -0.045756   0.110090  -0.416  0.67769
##                                             
## (Intercept)                              ***
## namesdist1_looks                         ** 
## namesdist2_looks                         ***
## namestarget_looks                        ***
## stress_play1                                
## syllable1                                *  
## syllable2                                   
## syllable3                                   
## namesdist1_looks:stress_play1               
## namesdist2_looks:stress_play1            *  
## namestarget_looks:stress_play1              
## namesdist1_looks:syllable1               ***
## namesdist2_looks:syllable1               ***
## namestarget_looks:syllable1              *  
## namesdist1_looks:syllable2               ***
## namesdist2_looks:syllable2               ***
## namestarget_looks:syllable2              ***
## namesdist1_looks:syllable3               ***
## namesdist2_looks:syllable3               ***
## namestarget_looks:syllable3              ***
## stress_play1:syllable1                      
## stress_play1:syllable2                      
## stress_play1:syllable3                      
## namesdist1_looks:stress_play1:syllable1     
## namesdist2_looks:stress_play1:syllable1  .  
## namestarget_looks:stress_play1:syllable1    
## namesdist1_looks:stress_play1:syllable2     
## namesdist2_looks:stress_play1:syllable2     
## namestarget_looks:stress_play1:syllable2    
## namesdist1_looks:stress_play1:syllable3     
## namesdist2_looks:stress_play1:syllable3     
## namestarget_looks:stress_play1:syllable3    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 120704  on 107323  degrees of freedom
## Residual deviance: 118904  on 107292  degrees of freedom
## AIC: 118968
## 
## Number of Fisher Scoring iterations: 4
summary(comp_tar_model1)
## 
## Call:
## glm(formula = values ~ stress_play * names + syllable, family = "binomial", 
##     data = comp_tar_longer)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.8745  -0.8084  -0.6884  -0.1173   1.8045  
## 
## Coefficients:
##                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                    -9.536e-01  1.749e-02 -54.531  < 2e-16 ***
## stress_play1                   -5.629e-03  2.742e-02  -0.205 0.837364    
## namesdist1_looks               -3.767e-01  2.027e-02 -18.582  < 2e-16 ***
## namesdist2_looks               -3.916e-01  2.033e-02 -19.259  < 2e-16 ***
## namestarget_looks               1.336e-01  1.901e-02   7.029 2.07e-12 ***
## syllable1                       2.475e-15  1.862e-02   0.000 1.000000    
## syllable2                      -7.547e-15  2.045e-02   0.000 1.000000    
## syllable3                       4.123e-16  2.033e-02   0.000 1.000000    
## stress_play1:namesdist1_looks   2.827e-02  4.055e-02   0.697 0.485625    
## stress_play1:namesdist2_looks   1.342e-01  4.066e-02   3.301 0.000963 ***
## stress_play1:namestarget_looks -1.060e-01  3.802e-02  -2.789 0.005285 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 120704  on 107323  degrees of freedom
## Residual deviance: 119606  on 107313  degrees of freedom
## AIC: 119628
## 
## Number of Fisher Scoring iterations: 4
summary(comp_tar_model2)
## 
## Call:
## glm(formula = values ~ stress_play + names * syllable, family = "binomial", 
##     data = comp_tar_longer)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.9705  -0.7954  -0.7001  -0.1027   1.8936  
## 
## Coefficients:
##                               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                 -9.886e-01  2.530e-02 -39.079  < 2e-16 ***
## stress_play1                -2.565e-14  1.454e-02   0.000  1.00000    
## namesdist1_looks            -1.117e-01  3.626e-02  -3.080  0.00207 ** 
## namesdist2_looks            -1.671e-01  3.653e-02  -4.576 4.75e-06 ***
## namestarget_looks           -1.658e-01  3.652e-02  -4.538 5.67e-06 ***
## syllable1                    9.316e-02  3.559e-02   2.617  0.00886 ** 
## syllable2                    6.969e-02  3.905e-02   1.785  0.07430 .  
## syllable3                   -3.476e-02  3.958e-02  -0.878  0.37988    
## namesdist1_looks:syllable1  -2.742e-01  5.190e-02  -5.283 1.27e-07 ***
## namesdist2_looks:syllable1  -2.331e-01  5.214e-02  -4.470 7.81e-06 ***
## namestarget_looks:syllable1  1.058e-01  5.096e-02   2.075  0.03796 *  
## namesdist1_looks:syllable2  -5.801e-01  5.902e-02  -9.829  < 2e-16 ***
## namesdist2_looks:syllable2  -4.345e-01  5.856e-02  -7.420 1.17e-13 ***
## namestarget_looks:syllable2  5.480e-01  5.460e-02  10.037  < 2e-16 ***
## namesdist1_looks:syllable3  -3.629e-01  5.872e-02  -6.179 6.44e-10 ***
## namesdist2_looks:syllable3  -3.440e-01  5.913e-02  -5.818 5.95e-09 ***
## namestarget_looks:syllable3  6.807e-01  5.504e-02  12.368  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 120704  on 107323  degrees of freedom
## Residual deviance: 118928  on 107307  degrees of freedom
## AIC: 118962
## 
## Number of Fisher Scoring iterations: 4
summary(comp_tar_model3)
## 
## Call:
## glm(formula = values ~ stress_play + names + syllable, family = "binomial", 
##     data = comp_tar_longer)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.8543  -0.8074  -0.6850  -0.1259   1.7750  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)       -9.537e-01  1.749e-02 -54.537  < 2e-16 ***
## stress_play1       1.452e-14  1.450e-02   0.000        1    
## namesdist1_looks  -3.766e-01  2.027e-02 -18.574  < 2e-16 ***
## namesdist2_looks  -3.897e-01  2.032e-02 -19.181  < 2e-16 ***
## namestarget_looks  1.337e-01  1.901e-02   7.034 2.01e-12 ***
## syllable1         -3.122e-15  1.862e-02   0.000        1    
## syllable2         -1.352e-14  2.044e-02   0.000        1    
## syllable3         -1.436e-15  2.032e-02   0.000        1    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 120704  on 107323  degrees of freedom
## Residual deviance: 119643  on 107316  degrees of freedom
## AIC: 119659
## 
## Number of Fisher Scoring iterations: 4
anova(comp_tar_model,comp_tar_model1)
## Analysis of Deviance Table
## 
## Model 1: values ~ names * stress_play * syllable
## Model 2: values ~ stress_play * names + syllable
##   Resid. Df Resid. Dev  Df Deviance
## 1    107292     118904             
## 2    107313     119606 -21  -701.87
anova(comp_tar_model1,comp_tar_model2)
## Analysis of Deviance Table
## 
## Model 1: values ~ stress_play * names + syllable
## Model 2: values ~ stress_play + names * syllable
##   Resid. Df Resid. Dev Df Deviance
## 1    107313     119606            
## 2    107307     118928  6   678.12
anova(comp_tar_model2,comp_tar_model3)
## Analysis of Deviance Table
## 
## Model 1: values ~ stress_play + names * syllable
## Model 2: values ~ stress_play + names + syllable
##   Resid. Df Resid. Dev Df Deviance
## 1    107307     118928            
## 2    107316     119643 -9  -714.66
bic_model = BIC(comp_tar_model)
bic_model1 = BIC(comp_tar_model1)
bic_model2 = BIC(comp_tar_model2)
bic_model3 = BIC(comp_tar_model3)

bic_model
## [1] 119275.1
bic_model1
## [1] 119733.7
bic_model2
## [1] 119125.1
bic_model3
## [1] 119735.5

#analysis 1 (target vs competitor analysis)

library(broom)
tidy_model <- tidy(comp_tar_model2, effects = "fixed")%>%
  mutate(
    main = case_when(
      str_detect(term, ":") ~ 1,
      TRUE ~ 0))%>%
   mutate(
    looks = case_when(
      str_detect(term, "target") ~ "target",
      str_detect(term, "dist1") ~ "distractor_1",
      str_detect(term, "dist2") ~ "distractor_2",
      TRUE ~ "Base_looks"))%>%
  mutate(
    syllable = case_when(
      str_detect(term, "syllable1") ~ "1",
      str_detect(term, "syllable2") ~ "2",
      str_detect(term, "syllable3") ~ "3",
      TRUE ~ "0"))%>%
  mutate(
      term=str_replace(term, "syllable1","Syllable 1"),
      term=str_replace(term, "syllable2","Syllable 1.5"),
      term=str_replace(term, "syllable3","Syllable 2"),
      term=str_replace(term, "_looks",""),
      term=str_replace(term, "names","Looks "),
      term=str_replace(term, "stress_play","Stress"),
      term=str_replace(term, "Stress1","Stress Ante"))%>%
  arrange(main, syllable, looks) %>%
  mutate(
    row_number = row_number(),
    looks = as.factor(looks),
    Significance = case_when(
      p.value < 0.001 ~ "***",
      p.value < 0.01 ~ "**",
      p.value < 0.05 ~ "*",
      TRUE ~ ""))


colors <- c("Base_looks" = "#007bc0", "distractor_1" = "#A7A7A9", 
            "distractor_2" = "#000000", "target" = "#c41230")

pen_anti_model<-ggplot(tidy_model, aes(x = estimate, y = row_number,color=factor(looks))) +
  geom_point() +
  geom_errorbarh(aes(xmin = estimate - 1.96 * std.error, xmax = estimate + 1.96 * std.error), height = 0) +
  labs(title = , x = "Estimated Coefficients", y = "",color = "Looks to grouping") +
  theme_minimal()+
  geom_vline(xintercept = 0,color = "#007bc0")+
  scale_y_continuous(breaks = tidy_model$row_number, labels = tidy_model$term)+
  scale_color_manual(
    values = c( "Base_looks" = "#007bc0", "distractor_1" = "#A7A7A9", 
                "distractor_2" = "#000000","target" = "#c41230"),
    labels = c( "Base", "Distractor 1", "Distractor 2","Target")
  )+
  annotate("rect", xmin = -2.7, xmax = -1.45, ymin = -1.5, ymax = 17.5, fill = "white",color="grey") +
  geom_text(aes(y = row_number,x=-1.5, label = term, color = factor(looks)),hjust = 1, size = 4,angle=40,show.legend = FALSE)+
  xlim(-2.7, 1)+
  coord_flip()+
  theme(axis.text.y = element_text(angle = 15, hjust = 1),
        axis.text.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.minor.y = element_blank(),
        axis.title.y = element_text(hjust = .75, vjust = 0.0),
        axis.title.x = element_text(hjust = .0, vjust = 0),
        panel.border = element_blank(),
        legend.position = "bottom")+
  scale_x_continuous(breaks=seq(-1, 1, by = .5))+
  geom_text(aes(y = 1,x=-2.5, label = "Terms"),color = "black",hjust = 1, size = 5,angle=0,show.legend = FALSE)+
    geom_text(aes(y = row_number, x = -1.2, label = Significance), hjust = 0, vjust = .5,angle=90, color = "black", show.legend = FALSE)
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
pen_anti_model
## Warning in geom_text(aes(y = 1, x = -2.5, label = "Terms"), color = "black", : All aesthetics have length 1, but the data has 17 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.

#ggsave("../visuals/pen_vs_anti_pen_model.jpeg", plot = pen_anti_model, width = 10, height = 6.6, dpi = 300)

#analysis 2 (target distributional bias)

tar_longer<-comp_tar_longer%>%
  filter(names=="target_looks")%>%
  group_by(word_play,stress_play,syllable)%>%
  summarize(tar_prop_o_looks=mean(values))%>%
  filter(syllable==c(2,3))%>%ungroup()
## `summarise()` has grouped output by 'word_play', 'stress_play'. You can
## override using the `.groups` argument.
tar_ana1<-lme4::lmer(tar_prop_o_looks~stress_play*syllable+(1|word_play),data=tar_longer)
tar_ana2<-lme4::lmer(tar_prop_o_looks~stress_play+syllable+(1|word_play),data=tar_longer)
tar_ana3<-lme4::lmer(tar_prop_o_looks~syllable+(1|word_play),data=tar_longer)
tar_ana4<-lme4::lmer(tar_prop_o_looks~stress_play+(1|word_play),data=tar_longer)

summary(tar_ana1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: tar_prop_o_looks ~ stress_play * syllable + (1 | word_play)
##    Data: tar_longer
## 
## REML criterion at convergence: -210.2
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.59089 -0.43531  0.00725  0.55208  1.60197 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  word_play (Intercept) 0.006061 0.07785 
##  Residual              0.001919 0.04380 
## Number of obs: 96, groups:  word_play, 48
## 
## Fixed effects:
##                         Estimate Std. Error t value
## (Intercept)             0.368440   0.012893  28.576
## stress_play1           -0.004686   0.025787  -0.182
## syllable3               0.007875   0.008941   0.881
## stress_play1:syllable3 -0.021011   0.017882  -1.175
## 
## Correlation of Fixed Effects:
##             (Intr) strs_1 syllb3
## stress_ply1  0.000              
## syllable3   -0.347  0.000       
## strss_pl1:3  0.000 -0.347  0.000
summary(tar_ana2)
## Linear mixed model fit by REML ['lmerMod']
## Formula: tar_prop_o_looks ~ stress_play + syllable + (1 | word_play)
##    Data: tar_longer
## 
## REML criterion at convergence: -215
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.51246 -0.52886 -0.02352  0.52863  1.71578 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  word_play (Intercept) 0.006053 0.07780 
##  Residual              0.001934 0.04398 
## Number of obs: 96, groups:  word_play, 48
## 
## Fixed effects:
##               Estimate Std. Error t value
## (Intercept)   0.368440   0.012900  28.562
## stress_play1 -0.015191   0.024187  -0.628
## syllable3     0.007875   0.008977   0.877
## 
## Correlation of Fixed Effects:
##             (Intr) strs_1
## stress_ply1  0.000       
## syllable3   -0.348  0.000
summary(tar_ana3)
## Linear mixed model fit by REML ['lmerMod']
## Formula: tar_prop_o_looks ~ syllable + (1 | word_play)
##    Data: tar_longer
## 
## REML criterion at convergence: -220.3
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.53523 -0.51694 -0.01162  0.55191  1.69300 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  word_play (Intercept) 0.005963 0.07722 
##  Residual              0.001934 0.04398 
## Number of obs: 96, groups:  word_play, 48
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) 0.368440   0.012826  28.725
## syllable3   0.007875   0.008977   0.877
## 
## Correlation of Fixed Effects:
##           (Intr)
## syllable3 -0.350
summary(tar_ana4)
## Linear mixed model fit by REML ['lmerMod']
## Formula: tar_prop_o_looks ~ stress_play + (1 | word_play)
##    Data: tar_longer
## 
## REML criterion at convergence: -221.9
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.55896 -0.47448  0.04201  0.48256  1.62967 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  word_play (Intercept) 0.006058 0.07783 
##  Residual              0.001925 0.04387 
## Number of obs: 96, groups:  word_play, 48
## 
## Fixed effects:
##              Estimate Std. Error t value
## (Intercept)   0.37238    0.01209  30.792
## stress_play1 -0.01519    0.02419  -0.628
## 
## Correlation of Fixed Effects:
##             (Intr)
## stress_ply1 0.000
#no significant results here- no bias

########analysis 3 (acoustics)

acoustic_data<-read.csv("acoustic_data_ppcc.csv")

aco_agg<-acoustic_data%>%
  group_by(stress,vowel)%>%
  na.omit()%>%
  summarize(mean_pitch=mean(mean_pitch),
            mean_amplitude=mean(mean_amplitude),
            mean_dur=mean(duration)*1000,
            mean_tilt=mean(mean_tilt))
## `summarise()` has grouped output by 'stress'. You can override using the
## `.groups` argument.
acoustic_data<-acoustic_data%>%
 mutate(
    stress_play = ifelse(stress == "antipenultimate", "Anti-penultimate", "Penultimate"),
    syllable = ifelse(vowel == "v1", 1, 3),
    syllable=as.factor(syllable),
    word=tolower(word))%>%
  select(-X)

penn_accoustic_data<-acoustic_data%>%
  filter(stress_play=="Penultimate")
ante_penn_accoustic_data<-acoustic_data%>%
  filter(stress_play!="Penultimate")

levels(penn_accoustic_data$syllable)
## [1] "1" "3"
# Create a function to run t-tests for each predictor
run_t_tests <- function(data, predictors, group_var) {
  results <- data.frame(Predictor = character(), T_value = numeric(), P_value = numeric(), Significance = character(), stringsAsFactors = FALSE)
  
  for (predictor in predictors) {
    cat("Running t-tests for predictor:", predictor, "\n")
    formula <- as.formula(paste(predictor, "~", group_var))
    t_test_result <- t.test(formula, data = data)
    
    # Determine significance level
    p_value <- t_test_result$p.value
    significance <- if (p_value < 0.001) {
      "***"
    } else if (p_value < 0.01) {
      "**"
    } else if (p_value < 0.05) {
      "*"
    } else {
      ""
    }
    
    # Append results to dataframe
    results <- rbind(results, data.frame(Predictor = predictor, T_value = t_test_result$statistic, P_value = p_value, Significance = significance))
  }
  return(results)
}


# Define the predictors and group variable
predictors <- c("mean_amplitude", "mean_pitch", "duration","mean_tilt")
group_var <- "syllable"

# Run t-tests
t_test_results_penn <- run_t_tests(penn_accoustic_data, predictors, group_var)
## Running t-tests for predictor: mean_amplitude 
## Running t-tests for predictor: mean_pitch 
## Running t-tests for predictor: duration 
## Running t-tests for predictor: mean_tilt
t_test_results_ante_penn <- run_t_tests(ante_penn_accoustic_data, predictors, group_var)
## Running t-tests for predictor: mean_amplitude 
## Running t-tests for predictor: mean_pitch 
## Running t-tests for predictor: duration 
## Running t-tests for predictor: mean_tilt
# Print the results
t_test_results_penn
##         Predictor     T_value      P_value Significance
## t  mean_amplitude   5.1787925 6.681214e-07          ***
## t1     mean_pitch   6.9926294 7.017645e-11          ***
## t2       duration -20.8351541 5.296237e-40          ***
## t3      mean_tilt   0.3321044 7.403028e-01
t_test_results_ante_penn
##         Predictor   T_value      P_value Significance
## t  mean_amplitude  7.507733 1.481562e-11          ***
## t1     mean_pitch 13.171503 2.130472e-22          ***
## t2       duration 13.111704 8.801271e-23          ***
## t3      mean_tilt  8.256081 2.160439e-13          ***

#analysis 3 target fixation analysis

ets_binary_analysis_syl_1<-comp_tar_longer%>%
  pivot_wider(names_from = names,values_from = values)%>%
  filter(syllable!=2)%>%
  filter(syllable!=0)%>%
  mutate(word=target)%>%
  left_join(acoustic_data)%>%
  filter(syllable==1)%>%
  mutate(mean_pitch_scaled = scale(mean_pitch),
         mean_amplitude_scaled = scale(mean_amplitude),
         mean_tilt_scaled = scale(mean_tilt),
         duration_scaled = scale(duration))
## Joining with `by = join_by(stress_play, syllable, word)`
ap_data_1<-ets_binary_analysis_syl_1%>%filter(stress_play!="Penultimate")
pp_data_1<-ets_binary_analysis_syl_1%>%filter(stress_play=="Penultimate")

mod_full<-glmer(target_looks~mean_pitch_scaled*mean_amplitude_scaled*mean_tilt_scaled*duration_scaled
                +(1|word),family=binomial, data= ap_data_1)
## boundary (singular) fit: see help('isSingular')
mod_full_2<-glmer(target_looks~mean_pitch_scaled*mean_amplitude_scaled*mean_tilt_scaled*duration_scaled
                +(1|word),family=binomial, data= pp_data_1)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00802168 (tol = 0.002, component 1)
ets_binary_analysis_syl_3<-comp_tar_longer%>%
  pivot_wider(names_from = names,values_from = values)%>%
  filter(syllable!=2)%>%
  filter(syllable!=0)%>%
  mutate(word=target)%>%
  left_join(acoustic_data)%>%
  filter(syllable==3)%>%
  mutate(mean_pitch_scaled = scale(mean_pitch),
         mean_amplitude_scaled = scale(mean_amplitude),
         mean_tilt_scaled = scale(mean_tilt),
         duration_scaled = scale(duration))
## Joining with `by = join_by(stress_play, syllable, word)`
ap_data_3<-ets_binary_analysis_syl_3%>%filter(stress_play!="Penultimate")
pp_data_3<-ets_binary_analysis_syl_3%>%filter(stress_play=="Penultimate")


mod_full_1_3<-glmer(target_looks~mean_pitch_scaled*mean_amplitude_scaled*mean_tilt_scaled*duration_scaled
                +(1|word),family=binomial, data= ap_data_3)
## boundary (singular) fit: see help('isSingular')
mod_full_2_3<-glmer(target_looks~mean_pitch_scaled*mean_amplitude_scaled*mean_tilt_scaled*duration_scaled
                +(1|word),family=binomial, data= pp_data_3)
## boundary (singular) fit: see help('isSingular')
summary(mod_full)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## target_looks ~ mean_pitch_scaled * mean_amplitude_scaled * mean_tilt_scaled *  
##     duration_scaled + (1 | word)
##    Data: ap_data_1
## 
##      AIC      BIC   logLik deviance df.resid 
##   5508.4   5618.2  -2737.2   5474.4     4718 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -0.7721 -0.6542 -0.5760  1.3696  2.4321 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  word   (Intercept) 0        0       
## Number of obs: 4735, groups:  word, 23
## 
## Fixed effects:
##                                                                          Estimate
## (Intercept)                                                              -1.35736
## mean_pitch_scaled                                                         0.05746
## mean_amplitude_scaled                                                     0.11058
## mean_tilt_scaled                                                          0.43284
## duration_scaled                                                           0.27806
## mean_pitch_scaled:mean_amplitude_scaled                                   0.20027
## mean_pitch_scaled:mean_tilt_scaled                                        0.08976
## mean_amplitude_scaled:mean_tilt_scaled                                    0.65550
## mean_pitch_scaled:duration_scaled                                         0.10331
## mean_amplitude_scaled:duration_scaled                                    -0.12135
## mean_tilt_scaled:duration_scaled                                         -0.29873
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled                  0.21292
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled                   0.30921
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled                       -0.59884
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled                   -0.89933
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled -0.75100
##                                                                          Std. Error
## (Intercept)                                                                 0.13196
## mean_pitch_scaled                                                           0.14945
## mean_amplitude_scaled                                                       0.22998
## mean_tilt_scaled                                                            0.13597
## duration_scaled                                                             0.09177
## mean_pitch_scaled:mean_amplitude_scaled                                     0.28133
## mean_pitch_scaled:mean_tilt_scaled                                          0.16733
## mean_amplitude_scaled:mean_tilt_scaled                                      0.17107
## mean_pitch_scaled:duration_scaled                                           0.10915
## mean_amplitude_scaled:duration_scaled                                       0.17742
## mean_tilt_scaled:duration_scaled                                            0.14827
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled                    0.13804
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled                     0.21837
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled                          0.15568
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled                      0.20104
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled    0.26374
##                                                                          z value
## (Intercept)                                                              -10.286
## mean_pitch_scaled                                                          0.384
## mean_amplitude_scaled                                                      0.481
## mean_tilt_scaled                                                           3.183
## duration_scaled                                                            3.030
## mean_pitch_scaled:mean_amplitude_scaled                                    0.712
## mean_pitch_scaled:mean_tilt_scaled                                         0.536
## mean_amplitude_scaled:mean_tilt_scaled                                     3.832
## mean_pitch_scaled:duration_scaled                                          0.947
## mean_amplitude_scaled:duration_scaled                                     -0.684
## mean_tilt_scaled:duration_scaled                                          -2.015
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled                   1.542
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled                    1.416
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled                        -3.847
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled                    -4.473
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled  -2.847
##                                                                          Pr(>|z|)
## (Intercept)                                                               < 2e-16
## mean_pitch_scaled                                                        0.700645
## mean_amplitude_scaled                                                    0.630659
## mean_tilt_scaled                                                         0.001456
## duration_scaled                                                          0.002446
## mean_pitch_scaled:mean_amplitude_scaled                                  0.476545
## mean_pitch_scaled:mean_tilt_scaled                                       0.591692
## mean_amplitude_scaled:mean_tilt_scaled                                   0.000127
## mean_pitch_scaled:duration_scaled                                        0.343882
## mean_amplitude_scaled:duration_scaled                                    0.494018
## mean_tilt_scaled:duration_scaled                                         0.043928
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled                 0.122957
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled                  0.156769
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled                       0.000120
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled                    7.7e-06
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled 0.004407
##                                                                             
## (Intercept)                                                              ***
## mean_pitch_scaled                                                           
## mean_amplitude_scaled                                                       
## mean_tilt_scaled                                                         ** 
## duration_scaled                                                          ** 
## mean_pitch_scaled:mean_amplitude_scaled                                     
## mean_pitch_scaled:mean_tilt_scaled                                          
## mean_amplitude_scaled:mean_tilt_scaled                                   ***
## mean_pitch_scaled:duration_scaled                                           
## mean_amplitude_scaled:duration_scaled                                       
## mean_tilt_scaled:duration_scaled                                         *  
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled                    
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled                     
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled                       ***
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled                   ***
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation matrix not shown by default, as p = 16 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
summary(mod_full_2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## target_looks ~ mean_pitch_scaled * mean_amplitude_scaled * mean_tilt_scaled *  
##     duration_scaled + (1 | word)
##    Data: pp_data_1
## 
##      AIC      BIC   logLik deviance df.resid 
##   3358.3   3459.6  -1662.2   3324.3     2842 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -0.8893 -0.6752 -0.5326  1.2260  2.2371 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  word   (Intercept) 0.01422  0.1193  
## Number of obs: 2859, groups:  word, 24
## 
## Fixed effects:
##                                                                          Estimate
## (Intercept)                                                              -1.20754
## mean_pitch_scaled                                                         0.39125
## mean_amplitude_scaled                                                    -2.40163
## mean_tilt_scaled                                                         -0.05164
## duration_scaled                                                          -0.09770
## mean_pitch_scaled:mean_amplitude_scaled                                  -2.49809
## mean_pitch_scaled:mean_tilt_scaled                                       -0.15619
## mean_amplitude_scaled:mean_tilt_scaled                                   -3.19909
## mean_pitch_scaled:duration_scaled                                         0.63661
## mean_amplitude_scaled:duration_scaled                                    -2.45907
## mean_tilt_scaled:duration_scaled                                         -0.08173
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled                 -1.30310
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled                  -2.53978
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled                       -0.06819
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled                   -3.15979
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled -1.28875
##                                                                          Std. Error
## (Intercept)                                                                 0.74381
## mean_pitch_scaled                                                           0.80779
## mean_amplitude_scaled                                                       3.59167
## mean_tilt_scaled                                                            0.94677
## duration_scaled                                                             0.81161
## mean_pitch_scaled:mean_amplitude_scaled                                     3.67626
## mean_pitch_scaled:mean_tilt_scaled                                          1.04027
## mean_amplitude_scaled:mean_tilt_scaled                                      3.34152
## mean_pitch_scaled:duration_scaled                                           0.86995
## mean_amplitude_scaled:duration_scaled                                       3.71385
## mean_tilt_scaled:duration_scaled                                            0.88847
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled                    3.45882
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled                     3.73623
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled                          0.97625
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled                      3.23502
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled    3.27218
##                                                                          z value
## (Intercept)                                                               -1.623
## mean_pitch_scaled                                                          0.484
## mean_amplitude_scaled                                                     -0.669
## mean_tilt_scaled                                                          -0.055
## duration_scaled                                                           -0.120
## mean_pitch_scaled:mean_amplitude_scaled                                   -0.680
## mean_pitch_scaled:mean_tilt_scaled                                        -0.150
## mean_amplitude_scaled:mean_tilt_scaled                                    -0.957
## mean_pitch_scaled:duration_scaled                                          0.732
## mean_amplitude_scaled:duration_scaled                                     -0.662
## mean_tilt_scaled:duration_scaled                                          -0.092
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled                  -0.377
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled                   -0.680
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled                        -0.070
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled                    -0.977
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled  -0.394
##                                                                          Pr(>|z|)
## (Intercept)                                                                 0.104
## mean_pitch_scaled                                                           0.628
## mean_amplitude_scaled                                                       0.504
## mean_tilt_scaled                                                            0.957
## duration_scaled                                                             0.904
## mean_pitch_scaled:mean_amplitude_scaled                                     0.497
## mean_pitch_scaled:mean_tilt_scaled                                          0.881
## mean_amplitude_scaled:mean_tilt_scaled                                      0.338
## mean_pitch_scaled:duration_scaled                                           0.464
## mean_amplitude_scaled:duration_scaled                                       0.508
## mean_tilt_scaled:duration_scaled                                            0.927
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled                    0.706
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled                     0.497
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled                          0.944
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled                      0.329
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled    0.694
## 
## Correlation matrix not shown by default, as p = 16 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.00802168 (tol = 0.002, component 1)
summary(mod_full_1_3)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## target_looks ~ mean_pitch_scaled * mean_amplitude_scaled * mean_tilt_scaled *  
##     duration_scaled + (1 | word)
##    Data: ap_data_3
## 
##      AIC      BIC   logLik deviance df.resid 
##   3480.8   3581.0  -1723.4   3446.8     2660 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.0057 -0.7770 -0.6576  1.2314  1.8784 
## 
## Random effects:
##  Groups Name        Variance  Std.Dev. 
##  word   (Intercept) 2.595e-14 1.611e-07
## Number of obs: 2677, groups:  word, 23
## 
## Fixed effects:
##                                                                          Estimate
## (Intercept)                                                               -6.6805
## mean_pitch_scaled                                                         -7.3133
## mean_amplitude_scaled                                                     -7.5312
## mean_tilt_scaled                                                           6.8617
## duration_scaled                                                           -5.3622
## mean_pitch_scaled:mean_amplitude_scaled                                   -7.9596
## mean_pitch_scaled:mean_tilt_scaled                                         7.7238
## mean_amplitude_scaled:mean_tilt_scaled                                    -0.3461
## mean_pitch_scaled:duration_scaled                                         -6.4490
## mean_amplitude_scaled:duration_scaled                                     -7.2997
## mean_tilt_scaled:duration_scaled                                           4.8232
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled                   0.2661
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled                   -7.8219
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled                         5.4536
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled                    -2.1295
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled  -1.8007
##                                                                          Std. Error
## (Intercept)                                                                  2.0526
## mean_pitch_scaled                                                            2.8747
## mean_amplitude_scaled                                                        2.3077
## mean_tilt_scaled                                                             7.2037
## duration_scaled                                                              1.9872
## mean_pitch_scaled:mean_amplitude_scaled                                      2.3649
## mean_pitch_scaled:mean_tilt_scaled                                           8.6769
## mean_amplitude_scaled:mean_tilt_scaled                                       5.8816
## mean_pitch_scaled:duration_scaled                                            2.7610
## mean_amplitude_scaled:duration_scaled                                        2.2047
## mean_tilt_scaled:duration_scaled                                             6.6331
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled                     8.5534
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled                      2.3461
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled                           7.9323
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled                       5.3915
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled     7.7251
##                                                                          z value
## (Intercept)                                                               -3.255
## mean_pitch_scaled                                                         -2.544
## mean_amplitude_scaled                                                     -3.264
## mean_tilt_scaled                                                           0.953
## duration_scaled                                                           -2.698
## mean_pitch_scaled:mean_amplitude_scaled                                   -3.366
## mean_pitch_scaled:mean_tilt_scaled                                         0.890
## mean_amplitude_scaled:mean_tilt_scaled                                    -0.059
## mean_pitch_scaled:duration_scaled                                         -2.336
## mean_amplitude_scaled:duration_scaled                                     -3.311
## mean_tilt_scaled:duration_scaled                                           0.727
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled                   0.031
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled                   -3.334
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled                         0.688
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled                    -0.395
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled  -0.233
##                                                                          Pr(>|z|)
## (Intercept)                                                              0.001136
## mean_pitch_scaled                                                        0.010959
## mean_amplitude_scaled                                                    0.001100
## mean_tilt_scaled                                                         0.340833
## duration_scaled                                                          0.006968
## mean_pitch_scaled:mean_amplitude_scaled                                  0.000764
## mean_pitch_scaled:mean_tilt_scaled                                       0.373379
## mean_amplitude_scaled:mean_tilt_scaled                                   0.953073
## mean_pitch_scaled:duration_scaled                                        0.019506
## mean_amplitude_scaled:duration_scaled                                    0.000930
## mean_tilt_scaled:duration_scaled                                         0.467142
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled                 0.975177
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled                  0.000856
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled                       0.491754
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled                   0.692860
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled 0.815682
##                                                                             
## (Intercept)                                                              ** 
## mean_pitch_scaled                                                        *  
## mean_amplitude_scaled                                                    ** 
## mean_tilt_scaled                                                            
## duration_scaled                                                          ** 
## mean_pitch_scaled:mean_amplitude_scaled                                  ***
## mean_pitch_scaled:mean_tilt_scaled                                          
## mean_amplitude_scaled:mean_tilt_scaled                                      
## mean_pitch_scaled:duration_scaled                                        *  
## mean_amplitude_scaled:duration_scaled                                    ***
## mean_tilt_scaled:duration_scaled                                            
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled                    
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled                  ***
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled                          
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled                      
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation matrix not shown by default, as p = 16 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
summary(mod_full_2_3)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## target_looks ~ mean_pitch_scaled * mean_amplitude_scaled * mean_tilt_scaled *  
##     duration_scaled + (1 | word)
##    Data: pp_data_3
## 
##      AIC      BIC   logLik deviance df.resid 
##   3670.9   3771.6  -1818.4   3636.9     2744 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.0486 -0.8021 -0.6854  1.1248  1.6062 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  word   (Intercept) 1.03e-13 3.21e-07
## Number of obs: 2761, groups:  word, 24
## 
## Fixed effects:
##                                                                          Estimate
## (Intercept)                                                              -0.52557
## mean_pitch_scaled                                                         0.23788
## mean_amplitude_scaled                                                     0.25235
## mean_tilt_scaled                                                          0.37625
## duration_scaled                                                          -0.06645
## mean_pitch_scaled:mean_amplitude_scaled                                  -0.25924
## mean_pitch_scaled:mean_tilt_scaled                                       -0.32735
## mean_amplitude_scaled:mean_tilt_scaled                                   -0.08979
## mean_pitch_scaled:duration_scaled                                         0.10687
## mean_amplitude_scaled:duration_scaled                                    -0.09348
## mean_tilt_scaled:duration_scaled                                         -0.38639
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled                  0.39126
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled                  -0.25763
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled                        0.59576
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled                    0.09401
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled -0.33147
##                                                                          Std. Error
## (Intercept)                                                                 0.43540
## mean_pitch_scaled                                                           0.28385
## mean_amplitude_scaled                                                       0.51760
## mean_tilt_scaled                                                            0.50162
## duration_scaled                                                             0.34138
## mean_pitch_scaled:mean_amplitude_scaled                                     0.44601
## mean_pitch_scaled:mean_tilt_scaled                                          0.32803
## mean_amplitude_scaled:mean_tilt_scaled                                      0.48944
## mean_pitch_scaled:duration_scaled                                           0.21838
## mean_amplitude_scaled:duration_scaled                                       0.44712
## mean_tilt_scaled:duration_scaled                                            0.48212
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled                    0.34162
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled                     0.63138
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled                          0.54071
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled                      0.33543
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled    0.31459
##                                                                          z value
## (Intercept)                                                               -1.207
## mean_pitch_scaled                                                          0.838
## mean_amplitude_scaled                                                      0.488
## mean_tilt_scaled                                                           0.750
## duration_scaled                                                           -0.195
## mean_pitch_scaled:mean_amplitude_scaled                                   -0.581
## mean_pitch_scaled:mean_tilt_scaled                                        -0.998
## mean_amplitude_scaled:mean_tilt_scaled                                    -0.183
## mean_pitch_scaled:duration_scaled                                          0.489
## mean_amplitude_scaled:duration_scaled                                     -0.209
## mean_tilt_scaled:duration_scaled                                          -0.801
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled                   1.145
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled                   -0.408
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled                         1.102
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled                     0.280
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled  -1.054
##                                                                          Pr(>|z|)
## (Intercept)                                                                 0.227
## mean_pitch_scaled                                                           0.402
## mean_amplitude_scaled                                                       0.626
## mean_tilt_scaled                                                            0.453
## duration_scaled                                                             0.846
## mean_pitch_scaled:mean_amplitude_scaled                                     0.561
## mean_pitch_scaled:mean_tilt_scaled                                          0.318
## mean_amplitude_scaled:mean_tilt_scaled                                      0.854
## mean_pitch_scaled:duration_scaled                                           0.625
## mean_amplitude_scaled:duration_scaled                                       0.834
## mean_tilt_scaled:duration_scaled                                            0.423
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled                    0.252
## mean_pitch_scaled:mean_amplitude_scaled:duration_scaled                     0.683
## mean_pitch_scaled:mean_tilt_scaled:duration_scaled                          0.271
## mean_amplitude_scaled:mean_tilt_scaled:duration_scaled                      0.779
## mean_pitch_scaled:mean_amplitude_scaled:mean_tilt_scaled:duration_scaled    0.292
## 
## Correlation matrix not shown by default, as p = 16 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

#competitor looks analyses

#
#comp_mod_full<-glmer(competitor_looks~mean_pitch_scaled*mean_amplitude_scaled*mean_tilt_scaled*duration_scaled
#                +(1|word),family=binomial, data= ap_data)

#comp_mod_full_2<-glmer(competitor_looks~mean_pitch_scaled*mean_amplitude_scaled*mean_tilt_scaled*duration_scaled
#                +(1|word),family=binomial, data= pp_data)

#comp_mod_full_1_3<-glmer(competitor_looks~mean_pitch_scaled*mean_amplitude_scaled*mean_tilt_scaled*duration_scaled
#                +(1|word),family=binomial, data= ap_data_3)

#comp_mod_full_2_3<-glmer(competitor_looks~mean_pitch_scaled*mean_amplitude_scaled*mean_tilt_scaled*duration_scaled
 #               +(1|word),family=binomial, data= pp_data_3)

#summary(comp_mod_full)
#summary(comp_mod_full_2)

#summary(comp_mod_full_1_3)
#summary(comp_mod_full_2_3)

#individual differences for target and competitor fixations

comp_tar_longer_id<-comp_tar_longer%>%
  left_join(battery_agg_wider_simp)%>%
  left_join(digit_cleaned_simp)%>%
  left_join(lextale_agg_cleaned_simp)%>%
  left_join(italian_lextale_agg_cleaned_simp)%>%
  left_join(autism_cleaned)%>%
  filter(syllable!=0)%>%
  filter(names=="target_looks")
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
comp_tar_longer_id_penn<-comp_tar_longer_id%>%filter(stress_play=="Penultimate")
comp_tar_longer_id_a_penn<-comp_tar_longer_id%>%filter(stress_play!="Penultimate")

lASSO Function

fit_models <- function(data, stress_filter) {
  # Filter the data
  data_filtered <- data %>% filter(stress_play == stress_filter)
  data_filtered <- na.omit(data_filtered)
  
  # Create the design matrix and response vector
  x <- model.matrix(values ~ syllable * (dprime_battery_up_dur + dprime_battery_up_ptich + dprime_battery_up_risetime + dprime_battery_up_formants + lextale_score + autism_score + italian_lextale_score + lextale_score), data = data_filtered)[, -1]
  y <- data_filtered$values
  
  # Set up lambda values for LASSO regression
  lambda_search_space <- 10^seq(10, -2, length = 100)
  
  # Fit the LASSO regression model
  lasso_model <- glmnet(x, y, alpha = 1, lambda = lambda_search_space, family = "binomial")
  
  # Cross-validation to find the best lambda
  cv <- cv.glmnet(x, y, alpha = 1, family = "binomial")
  best_lambda <- cv$lambda.min
  
  # Extract coefficients for the best lambda
  coef_lasso_best <- coef(lasso_model, s = best_lambda)
  
  # Convert coefficients to data frame for visualization
  coef_df <- as.data.frame(as.matrix(coef_lasso_best))
  coef_df$Predictor <- rownames(coef_df)
  rownames(coef_df) <- NULL
  
  # Visualize the coefficients
  coef_plot <- ggplot(coef_df, aes(x = Predictor, y = s1)) +
    geom_bar(stat = "identity") +
    coord_flip() +
    labs(title = paste("LASSO Regression Coefficients at Best Lambda (", round(best_lambda, 4), ")", sep = ""),
         x = "Predictor",
         y = "Coefficient Value") +
    theme_minimal()
  print(coef_plot)
  
  # Select significant variables based on LASSO coefficients
  significant_vars <- coef_df %>%
    filter(s1 != 0) %>%
    pull(Predictor)
  
  # Clean up the variable names by replacing syllable1, syllable2, and syllable3 with syllable
  significant_vars <- gsub("syllable[123]", "syllable", significant_vars)
  
  # Remove duplicate variables
  significant_vars <- unique(significant_vars)
  
  # Prepare formula for GLMM using significant variables
  significant_vars <- significant_vars[significant_vars != "(Intercept)"] # Remove intercept if present
  formula_str <- paste("values ~", paste(significant_vars, collapse = " + "), "+ (1|word_play)")
  
  # Convert the string to a formula object
  formula <- as.formula(formula_str)
  
  # Fit GLMM with selected variables
  model_selected <- glmer(formula, data = data_filtered, family = "binomial")
  
  # Print the summary of the GLMM
  print(summary(model_selected))
  
  # Return the summary and the plot
  list(summary = summary(model_selected), plot = coef_plot)
}
comp_tar_longer_id <- comp_tar_longer %>%
  left_join(battery_agg_wider_simp) %>%
  left_join(digit_cleaned_simp) %>%
  left_join(lextale_agg_cleaned_simp) %>%
  left_join(italian_lextale_agg_cleaned_simp) %>%
  left_join(autism_cleaned) %>%
  filter(syllable != 0) %>%
  filter(names == "target_looks")
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
# Fit models for Penultimate stress
summary_penultimate <- fit_models(comp_tar_longer_id, "Penultimate")
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0081802 (tol = 0.002, component 1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## values ~ lextale_score + italian_lextale_score + syllable:dprime_battery_up_dur +  
##     syllable:lextale_score + syllable:autism_score + syllable:italian_lextale_score +  
##     (1 | word_play)
##    Data: data_filtered
## 
##      AIC      BIC   logLik deviance df.resid 
##  10866.7  10965.6  -5419.4  10838.7     8635 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3423 -0.7284 -0.5918  1.1495  2.3940 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  word_play (Intercept) 0.1483   0.3851  
## Number of obs: 8649, groups:  word_play, 24
## 
## Fixed effects:
##                                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                     -5.02711    1.22079  -4.118 3.82e-05 ***
## lextale_score                   -0.04220    0.38805  -0.109 0.913405    
## italian_lextale_score            4.23226    1.27848   3.310 0.000932 ***
## syllable1:dprime_battery_up_dur -0.31647    0.46465  -0.681 0.495811    
## syllable2:dprime_battery_up_dur -0.08322    0.38092  -0.218 0.827054    
## syllable3:dprime_battery_up_dur -0.80535    0.43533  -1.850 0.064314 .  
## lextale_score:syllable2          0.83900    0.50270   1.669 0.095119 .  
## lextale_score:syllable3          0.78831    0.53056   1.486 0.137334    
## syllable1:autism_score          -0.22055    0.31420  -0.702 0.482711    
## syllable2:autism_score           0.45215    0.25938   1.743 0.081296 .  
## syllable3:autism_score           0.47694    0.29560   1.613 0.106645    
## italian_lextale_score:syllable2 -0.52111    0.44830  -1.162 0.245072    
## italian_lextale_score:syllable3 -0.47563    0.47568  -1.000 0.317364    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation matrix not shown by default, as p = 13 > 12.
## Use print(summary(model_selected), correlation=TRUE)  or
##     vcov(summary(model_selected))        if you need it

## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.0081802 (tol = 0.002, component 1)
# Fit models for non-Penultimate stress
summary_anti_penultimate <- fit_models(comp_tar_longer_id, "Anti-penultimate")
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00211687 (tol = 0.002, component 1)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## values ~ syllable + dprime_battery_up_dur + lextale_score + syllable:dprime_battery_up_dur +  
##     syllable:dprime_battery_up_ptich + syllable:lextale_score +  
##     (1 | word_play)
##    Data: data_filtered
## 
##      AIC      BIC   logLik deviance df.resid 
##  10870.6  10962.7  -5422.3  10844.6     8806 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3772 -0.7079 -0.5853  1.1847  2.6029 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  word_play (Intercept) 0.07606  0.2758  
## Number of obs: 8819, groups:  word_play, 24
## 
## Fixed effects:
##                                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                        -1.5634     0.2436  -6.419 1.37e-10 ***
## syllable2                          -0.4597     0.4323  -1.064   0.2875    
## syllable3                          -0.5004     0.3779  -1.324   0.1854    
## dprime_battery_up_dur              -1.7392     0.3613  -4.814 1.48e-06 ***
## lextale_score                       0.6349     0.2927   2.169   0.0301 *  
## syllable2:dprime_battery_up_dur    -0.7630     0.6721  -1.135   0.2562    
## syllable3:dprime_battery_up_dur    -0.4225     0.5793  -0.729   0.4658    
## syllable1:dprime_battery_up_ptich   0.8879     0.2138   4.154 3.27e-05 ***
## syllable2:dprime_battery_up_ptich   0.7265     0.3539   2.053   0.0401 *  
## syllable3:dprime_battery_up_ptich   0.4800     0.2869   1.673   0.0943 .  
## syllable2:lextale_score             0.9517     0.5350   1.779   0.0753 .  
## syllable3:lextale_score             0.9875     0.4674   2.113   0.0346 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##                        (Intr) syllb2 syllb3 dpr___ lxtl_s
## syllable2              -0.533                            
## syllable3              -0.610  0.343                     
## dprm_bttr__             0.134 -0.076 -0.086              
## lextale_scr            -0.940  0.529  0.606  0.046       
## syllbl2:dprm_bttry_p_d -0.072  0.150  0.046 -0.539 -0.024
## syllbl3:dprm_bttry_p_d -0.083  0.047  0.147 -0.624 -0.029
## syllbl1:___            -0.038  0.021  0.024 -0.198  0.117
## syllbl2:dprm_bttry_p_p  0.000 -0.059  0.001  0.000  0.000
## syllbl3:dprm_bttry_p_p  0.000  0.000 -0.057  0.000  0.000
## syllbl2:lx_             0.514 -0.965 -0.331 -0.025 -0.546
## syllbl3:lx_             0.589 -0.331 -0.966 -0.029 -0.627
##                        syllbl2:dprm_bttry_p_d syllbl3:dprm_bttry_p_d s1:___
## syllable2                                                                  
## syllable3                                                                  
## dprm_bttr__                                                                
## lextale_scr                                                                
## syllbl2:dprm_bttry_p_d                                                     
## syllbl3:dprm_bttry_p_d  0.337                                              
## syllbl1:___             0.107                  0.124                       
## syllbl2:dprm_bttry_p_p -0.200                  0.000                  0.000
## syllbl3:dprm_bttry_p_p  0.000                 -0.180                  0.000
## syllbl2:lx_             0.030                  0.016                 -0.064
## syllbl3:lx_             0.016                  0.030                 -0.073
##                        syllbl2:dprm_bttry_p_p syllbl3:dprm_bttry_p_p syl2:_
## syllable2                                                                  
## syllable3                                                                  
## dprm_bttr__                                                                
## lextale_scr                                                                
## syllbl2:dprm_bttry_p_d                                                     
## syllbl3:dprm_bttry_p_d                                                     
## syllbl1:___                                                                
## syllbl2:dprm_bttry_p_p                                                     
## syllbl3:dprm_bttry_p_p  0.000                                              
## syllbl2:lx_             0.132                  0.000                       
## syllbl3:lx_            -0.001                  0.128                  0.342
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.00211687 (tol = 0.002, component 1)
summary_penultimate$summary
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## values ~ lextale_score + italian_lextale_score + syllable:dprime_battery_up_dur +  
##     syllable:lextale_score + syllable:autism_score + syllable:italian_lextale_score +  
##     (1 | word_play)
##    Data: data_filtered
## 
##      AIC      BIC   logLik deviance df.resid 
##  10866.7  10965.6  -5419.4  10838.7     8635 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3423 -0.7284 -0.5918  1.1495  2.3940 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  word_play (Intercept) 0.1483   0.3851  
## Number of obs: 8649, groups:  word_play, 24
## 
## Fixed effects:
##                                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                     -5.02711    1.22079  -4.118 3.82e-05 ***
## lextale_score                   -0.04220    0.38805  -0.109 0.913405    
## italian_lextale_score            4.23226    1.27848   3.310 0.000932 ***
## syllable1:dprime_battery_up_dur -0.31647    0.46465  -0.681 0.495811    
## syllable2:dprime_battery_up_dur -0.08322    0.38092  -0.218 0.827054    
## syllable3:dprime_battery_up_dur -0.80535    0.43533  -1.850 0.064314 .  
## lextale_score:syllable2          0.83900    0.50270   1.669 0.095119 .  
## lextale_score:syllable3          0.78831    0.53056   1.486 0.137334    
## syllable1:autism_score          -0.22055    0.31420  -0.702 0.482711    
## syllable2:autism_score           0.45215    0.25938   1.743 0.081296 .  
## syllable3:autism_score           0.47694    0.29560   1.613 0.106645    
## italian_lextale_score:syllable2 -0.52111    0.44830  -1.162 0.245072    
## italian_lextale_score:syllable3 -0.47563    0.47568  -1.000 0.317364    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation matrix not shown by default, as p = 13 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.0081802 (tol = 0.002, component 1)
summary_anti_penultimate$summary
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## values ~ syllable + dprime_battery_up_dur + lextale_score + syllable:dprime_battery_up_dur +  
##     syllable:dprime_battery_up_ptich + syllable:lextale_score +  
##     (1 | word_play)
##    Data: data_filtered
## 
##      AIC      BIC   logLik deviance df.resid 
##  10870.6  10962.7  -5422.3  10844.6     8806 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3772 -0.7079 -0.5853  1.1847  2.6029 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  word_play (Intercept) 0.07606  0.2758  
## Number of obs: 8819, groups:  word_play, 24
## 
## Fixed effects:
##                                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                        -1.5634     0.2436  -6.419 1.37e-10 ***
## syllable2                          -0.4597     0.4323  -1.064   0.2875    
## syllable3                          -0.5004     0.3779  -1.324   0.1854    
## dprime_battery_up_dur              -1.7392     0.3613  -4.814 1.48e-06 ***
## lextale_score                       0.6349     0.2927   2.169   0.0301 *  
## syllable2:dprime_battery_up_dur    -0.7630     0.6721  -1.135   0.2562    
## syllable3:dprime_battery_up_dur    -0.4225     0.5793  -0.729   0.4658    
## syllable1:dprime_battery_up_ptich   0.8879     0.2138   4.154 3.27e-05 ***
## syllable2:dprime_battery_up_ptich   0.7265     0.3539   2.053   0.0401 *  
## syllable3:dprime_battery_up_ptich   0.4800     0.2869   1.673   0.0943 .  
## syllable2:lextale_score             0.9517     0.5350   1.779   0.0753 .  
## syllable3:lextale_score             0.9875     0.4674   2.113   0.0346 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##                        (Intr) syllb2 syllb3 dpr___ lxtl_s
## syllable2              -0.533                            
## syllable3              -0.610  0.343                     
## dprm_bttr__             0.134 -0.076 -0.086              
## lextale_scr            -0.940  0.529  0.606  0.046       
## syllbl2:dprm_bttry_p_d -0.072  0.150  0.046 -0.539 -0.024
## syllbl3:dprm_bttry_p_d -0.083  0.047  0.147 -0.624 -0.029
## syllbl1:___            -0.038  0.021  0.024 -0.198  0.117
## syllbl2:dprm_bttry_p_p  0.000 -0.059  0.001  0.000  0.000
## syllbl3:dprm_bttry_p_p  0.000  0.000 -0.057  0.000  0.000
## syllbl2:lx_             0.514 -0.965 -0.331 -0.025 -0.546
## syllbl3:lx_             0.589 -0.331 -0.966 -0.029 -0.627
##                        syllbl2:dprm_bttry_p_d syllbl3:dprm_bttry_p_d s1:___
## syllable2                                                                  
## syllable3                                                                  
## dprm_bttr__                                                                
## lextale_scr                                                                
## syllbl2:dprm_bttry_p_d                                                     
## syllbl3:dprm_bttry_p_d  0.337                                              
## syllbl1:___             0.107                  0.124                       
## syllbl2:dprm_bttry_p_p -0.200                  0.000                  0.000
## syllbl3:dprm_bttry_p_p  0.000                 -0.180                  0.000
## syllbl2:lx_             0.030                  0.016                 -0.064
## syllbl3:lx_             0.016                  0.030                 -0.073
##                        syllbl2:dprm_bttry_p_p syllbl3:dprm_bttry_p_p syl2:_
## syllable2                                                                  
## syllable3                                                                  
## dprm_bttr__                                                                
## lextale_scr                                                                
## syllbl2:dprm_bttry_p_d                                                     
## syllbl3:dprm_bttry_p_d                                                     
## syllbl1:___                                                                
## syllbl2:dprm_bttry_p_p                                                     
## syllbl3:dprm_bttry_p_p  0.000                                              
## syllbl2:lx_             0.132                  0.000                       
## syllbl3:lx_            -0.001                  0.128                  0.342
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.00211687 (tol = 0.002, component 1)
summary_penultimate$plot

summary_anti_penultimate$plot

library(conflicted)
ets_exp1_all_agg_part_wider<-comp_tar_longer%>%
  pivot_wider(names_from = names,values_from = values)%>%
  filter(syllable!=0)%>%
  group_by(participant.private.id,stress_play,syllable)%>%
  summarise(mean_target_looks=mean(target_looks),
            mean_competitor_looks=mean(competitor_looks),
            mean_dist1_looks=mean(dist1_looks),
            mean_dist2_looks=mean(dist2_looks))%>%
  mutate(proportion_o_looks=mean_target_looks-mean_competitor_looks)%>%
  left_join(battery_agg_wider_simp)%>%
  left_join(digit_cleaned_simp)%>%
  left_join(lextale_agg_cleaned_simp)%>%
  left_join(italian_lextale_agg_cleaned_simp)%>%
  left_join(autism_cleaned)


ets_exp1_all_agg_part_wider%>%
  ggplot(aes(x=dprime_battery_up_dur,y=proportion_o_looks))+
  geom_point()+
  geom_smooth()+
  facet_grid(syllable~stress_play)

ets_exp1_all_agg_part_wider%>%
  ggplot(aes(x=dprime_battery_up_formants,y=proportion_o_looks))+
  geom_point()+
    geom_smooth()+
  facet_grid(syllable~stress_play)

ets_exp1_all_agg_part_wider%>%
  ggplot(aes(x=dprime_battery_up_ptich,y=proportion_o_looks))+
  geom_point()+
    geom_smooth()+
  facet_grid(syllable~stress_play)

ets_exp1_all_agg_part_wider%>%
  ggplot(aes(x=dprime_battery_up_risetime,y=proportion_o_looks))+
  geom_point()+
    geom_smooth()+
  facet_grid(syllable~stress_play)

ets_exp1_all_agg_part_wider%>%
  ggplot(aes(x=autism_score,y=proportion_o_looks))+
  geom_point()+
    geom_smooth()+
  facet_grid(syllable~stress_play)

ets_exp1_all_agg_part_wider%>%
  ggplot(aes(x=lextale_score,y=proportion_o_looks))+
  geom_point()+
    geom_smooth()+
  facet_grid(syllable~stress_play)

ets_exp1_all_agg_part_wider%>%
  ggplot(aes(x=italian_lextale_score,y=proportion_o_looks))+
  geom_point()+
    geom_smooth()+
  facet_grid(syllable~stress_play)

ets_exp1_all_agg_part_wider%>%
  ggplot(aes(x=mean_working_memory,y=proportion_o_looks))+
  geom_point()+
    geom_smooth()+
  facet_grid(syllable~stress_play)

ets_exp1_all_agg_part_wider%>%
  ggplot(aes(y=proportion_o_looks,x=dprime_battery_up_ptich,size=dprime_battery_up_dur,alpha=.2))+
  geom_point()+
    geom_smooth()+
  facet_grid(syllable~stress_play)

ets_exp1_all_agg_part_wider%>%
  ggplot(aes(y=mean_target_looks,x=autism_score,size=italian_lextale_score,color=syllable,alpha=.2))+
  geom_point()+
    geom_smooth()+
  facet_grid(.~stress_play)

write.csv(ets_exp1_all_agg_part_wider, "../../ppcc_iflu_data/ets_exp1_all_agg_part_wider.csv")
fit_models_acoustic_id <- function(data) {
  # Filter the data
  data_filtered <- na.omit(data)
  
  # Create the design matrix and response vector
  x <- model.matrix(target_looks ~ (dprime_battery_up_dur + 
                                      dprime_battery_up_ptich + 
                                      dprime_battery_up_risetime + 
                                      dprime_battery_up_formants + 
                                      lextale_score + autism_score + 
                                      italian_lextale_score + 
                                      lextale_score)*
                      (mean_pitch_scaled+
                         duration_scaled+
                         mean_amplitude_scaled+
                         mean_tilt_scaled), data = data_filtered)[, -1]
  y <- data_filtered$target_looks
  
  # Set up lambda values for LASSO regression
  lambda_search_space <- 10^seq(10, -2, length = 100)
  
  # Fit the LASSO regression model
  lasso_model <- glmnet(x, y, alpha = 1, lambda = lambda_search_space, family = "binomial")
  
  # Cross-validation to find the best lambda
  cv <- cv.glmnet(x, y, alpha = 1, family = "binomial")
  best_lambda <- cv$lambda.min
  
  # Extract coefficients for the best lambda
  coef_lasso_best <- coef(lasso_model, s = best_lambda)
  
  # Convert coefficients to data frame for visualization
  coef_df <- as.data.frame(as.matrix(coef_lasso_best))
  coef_df$Predictor <- rownames(coef_df)
  rownames(coef_df) <- NULL
  
  # Visualize the coefficients
  coef_plot <- ggplot(coef_df, aes(x = Predictor, y = s1)) +
    geom_bar(stat = "identity") +
    coord_flip() +
    labs(title = paste("LASSO Regression Coefficients at Best Lambda (", round(best_lambda, 4), ")", sep = ""),
         x = "Predictor",
         y = "Coefficient Value") +
    theme_minimal()
  print(coef_plot)
  
  # Select significant variables based on LASSO coefficients
  significant_vars <- coef_df %>%
    filter(s1 != 0) %>%
    pull(Predictor)
  
  # Clean up the variable names by replacing syllable1, syllable2, and syllable3 with syllable
  significant_vars <- gsub("syllable[123]", "syllable", significant_vars)
  
  # Remove duplicate variables
  significant_vars <- unique(significant_vars)
  
  # Prepare formula for GLMM using significant variables
  significant_vars <- significant_vars[significant_vars != "(Intercept)"] # Remove intercept if present
  formula_str <- paste("target_looks ~", paste(significant_vars, collapse = " + "), "+ (1|word)")
  
  # Convert the string to a formula object
  formula <- as.formula(formula_str)
  formula
  # Fit GLMM with selected variables
  model_selected <- glmer(formula, data = data_filtered, family = "binomial")
  
  # Print the summary of the GLMM
  print(summary(model_selected))
  
  # Return the summary and the plot
  list(summary = summary(model_selected), plot = coef_plot)
}
# Prepare datasets
ap_data_1_id <- ap_data_1 %>%
  left_join(battery_agg_wider_simp) %>%
  left_join(digit_cleaned_simp) %>%
  left_join(lextale_agg_cleaned_simp) %>%
  left_join(italian_lextale_agg_cleaned_simp) %>%
  left_join(autism_cleaned)
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
ap_data_3_id <- ap_data_3 %>%
  left_join(battery_agg_wider_simp) %>%
  left_join(digit_cleaned_simp) %>%
  left_join(lextale_agg_cleaned_simp) %>%
  left_join(italian_lextale_agg_cleaned_simp) %>%
  left_join(autism_cleaned)
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
pp_data_1_id <- pp_data_1 %>%
  left_join(battery_agg_wider_simp) %>%
  left_join(digit_cleaned_simp) %>%
  left_join(lextale_agg_cleaned_simp) %>%
  left_join(italian_lextale_agg_cleaned_simp) %>%
  left_join(autism_cleaned)
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
pp_data_3_id <- pp_data_3 %>%
  left_join(battery_agg_wider_simp) %>%
  left_join(digit_cleaned_simp) %>%
  left_join(lextale_agg_cleaned_simp) %>%
  left_join(italian_lextale_agg_cleaned_simp) %>%
  left_join(autism_cleaned)
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
## Joining with `by = join_by(participant.private.id)`
result_ap_data_1 <- fit_models_acoustic_id(ap_data_1_id)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.011968 (tol = 0.002, component 1)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: target_looks ~ dprime_battery_up_dur + dprime_battery_up_ptich +  
##     lextale_score + italian_lextale_score + dprime_battery_up_dur:duration_scaled +  
##     dprime_battery_up_dur:mean_tilt_scaled + dprime_battery_up_ptich:mean_pitch_scaled +  
##     dprime_battery_up_risetime:mean_pitch_scaled + dprime_battery_up_risetime:mean_tilt_scaled +  
##     autism_score:mean_tilt_scaled + (1 | word)
##    Data: data_filtered
## 
##      AIC      BIC   logLik deviance df.resid 
##   5082.1   5158.6  -2529.0   5058.1     4345 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.1859 -0.6473 -0.5480  1.2329  2.9375 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  word   (Intercept) 0.08389  0.2896  
## Number of obs: 4357, groups:  word, 23
## 
## Fixed effects:
##                                              Estimate Std. Error z value
## (Intercept)                                   -4.7168     1.7104  -2.758
## dprime_battery_up_dur                         -1.0733     0.4461  -2.406
## dprime_battery_up_ptich                        0.9954     0.2214   4.496
## lextale_score                                  0.6437     0.3072   2.095
## italian_lextale_score                          3.1846     1.7753   1.794
## dprime_battery_up_dur:duration_scaled         -0.8175     0.4079  -2.004
## dprime_battery_up_dur:mean_tilt_scaled        -0.1021     0.3737  -0.273
## dprime_battery_up_ptich:mean_pitch_scaled      0.2781     0.2303   1.207
## mean_pitch_scaled:dprime_battery_up_risetime   0.2858     0.3211   0.890
## mean_tilt_scaled:dprime_battery_up_risetime   -0.6141     0.2922  -2.101
## mean_tilt_scaled:autism_score                  0.2614     0.1680   1.556
##                                              Pr(>|z|)    
## (Intercept)                                   0.00582 ** 
## dprime_battery_up_dur                         0.01614 *  
## dprime_battery_up_ptich                      6.93e-06 ***
## lextale_score                                 0.03614 *  
## italian_lextale_score                         0.07284 .  
## dprime_battery_up_dur:duration_scaled         0.04507 *  
## dprime_battery_up_dur:mean_tilt_scaled        0.78463    
## dprime_battery_up_ptich:mean_pitch_scaled     0.22728    
## mean_pitch_scaled:dprime_battery_up_risetime  0.37338    
## mean_tilt_scaled:dprime_battery_up_risetime   0.03561 *  
## mean_tilt_scaled:autism_score                 0.11969    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##                   (Intr) dprm_bttry_p_d dprm_bttry_p_p lxtl_s itln__ dp___:_
## dprm_bttry_p_d     0.059                                                    
## dprm_bttry_p_p    -0.023 -0.156                                             
## lextale_scr        0.057  0.034          0.114                              
## itln_lxtl_s       -0.989 -0.042          0.017         -0.195               
## dprm_bt__:_       -0.001 -0.515         -0.012          0.003  0.001        
## dprm_bttry_p_d:__ -0.023 -0.140          0.009          0.006  0.016 -0.172 
## dprm_bttry_p_p:__  0.004 -0.018         -0.066         -0.006 -0.004  0.007 
## mn_ptc_:___       -0.024 -0.012         -0.008         -0.019  0.026  0.001 
## mn_tlt_:___       -0.045  0.007         -0.110         -0.054  0.053  0.017 
## mn_tlt_sc:_       -0.075 -0.063         -0.023         -0.004  0.063 -0.037 
##                   dprm_bttry_p_d:__ dprm_bttry_p_p:__ mn_p_:___ mn_t_:___
## dprm_bttry_p_d                                                           
## dprm_bttry_p_p                                                           
## lextale_scr                                                              
## itln_lxtl_s                                                              
## dprm_bt__:_                                                              
## dprm_bttry_p_d:__                                                        
## dprm_bttry_p_p:__  0.047                                                 
## mn_ptc_:___       -0.019            -0.436                               
## mn_tlt_:___       -0.246            -0.014             0.191             
## mn_tlt_sc:_        0.463             0.003             0.015     0.021   
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.011968 (tol = 0.002, component 1)
result_pp_data_1 <- fit_models_acoustic_id(pp_data_1_id)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00760328 (tol = 0.002, component 1)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: target_looks ~ dprime_battery_up_risetime + mean_pitch_scaled +  
##     duration_scaled + mean_amplitude_scaled + dprime_battery_up_dur:mean_amplitude_scaled +  
##     dprime_battery_up_ptich:mean_pitch_scaled + dprime_battery_up_ptich:duration_scaled +  
##     lextale_score:mean_tilt_scaled + autism_score:mean_tilt_scaled +  
##     italian_lextale_score:mean_amplitude_scaled + (1 | word)
##    Data: data_filtered
## 
##      AIC      BIC   logLik deviance df.resid 
##   3089.8   3160.3  -1532.9   3065.8     2622 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -0.9948 -0.6322 -0.5346  1.1321  2.3729 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  word   (Intercept) 0.1263   0.3554  
## Number of obs: 2634, groups:  word, 24
## 
## Fixed effects:
##                                             Estimate Std. Error z value
## (Intercept)                                 -1.04585    0.29216  -3.580
## dprime_battery_up_risetime                  -0.39073    0.40509  -0.965
## mean_pitch_scaled                           -0.06378    0.09418  -0.677
## duration_scaled                             -0.11509    0.27094  -0.425
## mean_amplitude_scaled                        0.58582    1.80642   0.324
## mean_amplitude_scaled:dprime_battery_up_dur  0.25248    0.38781   0.651
## mean_pitch_scaled:dprime_battery_up_ptich    0.11475    0.27933   0.411
## duration_scaled:dprime_battery_up_ptich      0.19011    0.30430   0.625
## lextale_score:mean_tilt_scaled               0.18939    0.19524   0.970
## mean_tilt_scaled:autism_score                0.18958    0.30221   0.627
## mean_amplitude_scaled:italian_lextale_score -0.66010    1.84596  -0.358
##                                             Pr(>|z|)    
## (Intercept)                                 0.000344 ***
## dprime_battery_up_risetime                  0.334766    
## mean_pitch_scaled                           0.498288    
## duration_scaled                             0.670999    
## mean_amplitude_scaled                       0.745712    
## mean_amplitude_scaled:dprime_battery_up_dur 0.515024    
## mean_pitch_scaled:dprime_battery_up_ptich   0.681214    
## duration_scaled:dprime_battery_up_ptich     0.532131    
## lextale_score:mean_tilt_scaled              0.332025    
## mean_tilt_scaled:autism_score               0.530449    
## mean_amplitude_scaled:italian_lextale_score 0.720650    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) dpr___ mn_pt_ drtn_s mn_mp_ mn_m_:___ mn_p_:___ d_:___
## dprm_bttr__  0.114                                                       
## mn_ptch_scl -0.093 -0.003                                                
## duratn_scld  0.925  0.052 -0.192                                         
## mn_mpltd_sc -0.014 -0.028  0.004 -0.009                                  
## mn_mpl_:___  0.001 -0.010  0.014  0.002  0.046                           
## mn_ptc_:___ -0.004 -0.009  0.417 -0.037  0.013  0.043                    
## drtn_sc:___  0.040  0.322 -0.090  0.160  0.003  0.001    -0.223          
## lxtl_scr:__  0.221 -0.005  0.085  0.130  0.084 -0.027    -0.049     0.068
## mn_tlt_sc:_  0.056  0.012  0.053  0.021 -0.104  0.041     0.060    -0.073
## mn_mplt_:__  0.001  0.028 -0.001  0.000 -0.999 -0.019    -0.012    -0.003
##             lx_:__ mn__:_
## dprm_bttr__              
## mn_ptch_scl              
## duratn_scld              
## mn_mpltd_sc              
## mn_mpl_:___              
## mn_ptc_:___              
## drtn_sc:___              
## lxtl_scr:__              
## mn_tlt_sc:_ -0.700       
## mn_mplt_:__ -0.096  0.103
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.00760328 (tol = 0.002, component 1)
result_ap_data_3 <- fit_models_acoustic_id(ap_data_3_id)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0039877 (tol = 0.002, component 1)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## target_looks ~ dprime_battery_up_dur + lextale_score + dprime_battery_up_dur:mean_pitch_scaled +  
##     dprime_battery_up_dur:mean_tilt_scaled + dprime_battery_up_ptich:mean_pitch_scaled +  
##     dprime_battery_up_ptich:mean_tilt_scaled + dprime_battery_up_risetime:mean_pitch_scaled +  
##     lextale_score:mean_pitch_scaled + (1 | word)
##    Data: data_filtered
## 
##      AIC      BIC   logLik deviance df.resid 
##   3195.0   3253.1  -1587.5   3175.0     2465 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4416 -0.7549 -0.6334  1.1529  1.9937 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  word   (Intercept) 0.03065  0.1751  
## Number of obs: 2475, groups:  word, 23
## 
## Fixed effects:
##                                              Estimate Std. Error z value
## (Intercept)                                   -1.9924     0.3051  -6.531
## dprime_battery_up_dur                         -2.3587     1.4371  -1.641
## lextale_score                                  0.6420     0.4704   1.365
## dprime_battery_up_dur:mean_pitch_scaled       -0.2635     1.6336  -0.161
## dprime_battery_up_dur:mean_tilt_scaled         0.5143     0.5085   1.011
## mean_pitch_scaled:dprime_battery_up_ptich     -0.7025     0.3616  -1.942
## mean_tilt_scaled:dprime_battery_up_ptich       0.4910     0.3569   1.376
## mean_pitch_scaled:dprime_battery_up_risetime  -0.7644     0.4579  -1.669
## lextale_score:mean_pitch_scaled               -1.1386     0.3560  -3.199
##                                              Pr(>|z|)    
## (Intercept)                                  6.52e-11 ***
## dprime_battery_up_dur                         0.10074    
## lextale_score                                 0.17236    
## dprime_battery_up_dur:mean_pitch_scaled       0.87183    
## dprime_battery_up_dur:mean_tilt_scaled        0.31184    
## mean_pitch_scaled:dprime_battery_up_ptich     0.05208 .  
## mean_tilt_scaled:dprime_battery_up_ptich      0.16885    
## mean_pitch_scaled:dprime_battery_up_risetime  0.09509 .  
## lextale_score:mean_pitch_scaled               0.00138 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##                             (Intr) dpr___ lxtl_s dprm_bttry_p_dr:mn_p_
## dprm_bttr__                  0.046                                    
## lextale_scr                 -0.756  0.417                             
## dprm_bttry_p_dr:mn_p_        0.003  0.942  0.427                      
## dprm_bttry_p_dr:mn_t_       -0.005  0.327  0.054  0.263               
## mn_ptch_scld:dprm_bttry_p_p  0.105  0.017 -0.065 -0.043               
## mn_tlt_:___                  0.006 -0.102 -0.106 -0.069               
## mn_ptch_scld:dprm_bttry_p_r -0.119 -0.003  0.091 -0.055               
## lxtl_scr:__                  0.009  0.648  0.608  0.678               
##                             dprm_bttry_p_dr:mn_t_ mn_ptch_scld:dprm_bttry_p_p
## dprm_bttr__                                                                  
## lextale_scr                                                                  
## dprm_bttry_p_dr:mn_p_                                                        
## dprm_bttry_p_dr:mn_t_                                                        
## mn_ptch_scld:dprm_bttry_p_p  0.080                                           
## mn_tlt_:___                 -0.517                -0.163                     
## mn_ptch_scld:dprm_bttry_p_r  0.011                -0.325                     
## lxtl_scr:__                  0.069                 0.116                     
##                             mn_t_:___ mn_ptch_scld:dprm_bttry_p_r
## dprm_bttr__                                                      
## lextale_scr                                                      
## dprm_bttry_p_dr:mn_p_                                            
## dprm_bttry_p_dr:mn_t_                                            
## mn_ptch_scld:dprm_bttry_p_p                                      
## mn_tlt_:___                                                      
## mn_ptch_scld:dprm_bttry_p_r  0.000                               
## lxtl_scr:__                 -0.143     0.022                     
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.0039877 (tol = 0.002, component 1)
result_pp_data_3 <- fit_models_acoustic_id(pp_data_3_id)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0557315 (tol = 0.002, component 1)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: target_looks ~ dprime_battery_up_dur + lextale_score + autism_score +  
##     italian_lextale_score + dprime_battery_up_dur:mean_pitch_scaled +  
##     dprime_battery_up_formants:duration_scaled + autism_score:mean_pitch_scaled +  
##     (1 | word)
##    Data: data_filtered
## 
##      AIC      BIC   logLik deviance df.resid 
##   3392.5   3445.1  -1687.2   3374.5     2552 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.2893 -0.7930 -0.6725  1.1483  1.7532 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  word   (Intercept) 0.08264  0.2875  
## Number of obs: 2561, groups:  word, 24
## 
## Fixed effects:
##                                            Estimate Std. Error z value Pr(>|z|)
## (Intercept)                                 -7.6840     2.2153  -3.469 0.000523
## dprime_battery_up_dur                       -0.8787     0.6379  -1.377 0.168388
## lextale_score                                0.7306     0.3624   2.016 0.043791
## autism_score                                 0.3281     0.3595   0.913 0.361505
## italian_lextale_score                        6.5459     2.2736   2.879 0.003989
## dprime_battery_up_dur:mean_pitch_scaled     -0.2193     0.5289  -0.415 0.678439
## dprime_battery_up_formants:duration_scaled   0.6316     0.3416   1.849 0.064511
## autism_score:mean_pitch_scaled               0.1938     0.2296   0.844 0.398641
##                                               
## (Intercept)                                ***
## dprime_battery_up_dur                         
## lextale_score                              *  
## autism_score                                  
## italian_lextale_score                      ** 
## dprime_battery_up_dur:mean_pitch_scaled       
## dprime_battery_up_formants:duration_scaled .  
## autism_score:mean_pitch_scaled                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) dpr___ lxtl_s atsm_s itln__ d___:__ dp___:_
## dprm_bttr__  0.063                                            
## lextale_scr  0.062  0.034                                     
## autism_scor -0.243  0.352  0.016                              
## itln_lxtl_s -0.989 -0.064 -0.190  0.193                       
## dprm_b__:__ -0.020 -0.701 -0.006 -0.329  0.023                
## dprm_bt__:_ -0.195 -0.305  0.065 -0.099  0.207  0.125         
## atsm_scr:__  0.003 -0.394 -0.010 -0.513 -0.003  0.589  -0.038 
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.0557315 (tol = 0.002, component 1)
# Print summaries and display plots
print(result_ap_data_1$summary)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: target_looks ~ dprime_battery_up_dur + dprime_battery_up_ptich +  
##     lextale_score + italian_lextale_score + dprime_battery_up_dur:duration_scaled +  
##     dprime_battery_up_dur:mean_tilt_scaled + dprime_battery_up_ptich:mean_pitch_scaled +  
##     dprime_battery_up_risetime:mean_pitch_scaled + dprime_battery_up_risetime:mean_tilt_scaled +  
##     autism_score:mean_tilt_scaled + (1 | word)
##    Data: data_filtered
## 
##      AIC      BIC   logLik deviance df.resid 
##   5082.1   5158.6  -2529.0   5058.1     4345 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.1859 -0.6473 -0.5480  1.2329  2.9375 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  word   (Intercept) 0.08389  0.2896  
## Number of obs: 4357, groups:  word, 23
## 
## Fixed effects:
##                                              Estimate Std. Error z value
## (Intercept)                                   -4.7168     1.7104  -2.758
## dprime_battery_up_dur                         -1.0733     0.4461  -2.406
## dprime_battery_up_ptich                        0.9954     0.2214   4.496
## lextale_score                                  0.6437     0.3072   2.095
## italian_lextale_score                          3.1846     1.7753   1.794
## dprime_battery_up_dur:duration_scaled         -0.8175     0.4079  -2.004
## dprime_battery_up_dur:mean_tilt_scaled        -0.1021     0.3737  -0.273
## dprime_battery_up_ptich:mean_pitch_scaled      0.2781     0.2303   1.207
## mean_pitch_scaled:dprime_battery_up_risetime   0.2858     0.3211   0.890
## mean_tilt_scaled:dprime_battery_up_risetime   -0.6141     0.2922  -2.101
## mean_tilt_scaled:autism_score                  0.2614     0.1680   1.556
##                                              Pr(>|z|)    
## (Intercept)                                   0.00582 ** 
## dprime_battery_up_dur                         0.01614 *  
## dprime_battery_up_ptich                      6.93e-06 ***
## lextale_score                                 0.03614 *  
## italian_lextale_score                         0.07284 .  
## dprime_battery_up_dur:duration_scaled         0.04507 *  
## dprime_battery_up_dur:mean_tilt_scaled        0.78463    
## dprime_battery_up_ptich:mean_pitch_scaled     0.22728    
## mean_pitch_scaled:dprime_battery_up_risetime  0.37338    
## mean_tilt_scaled:dprime_battery_up_risetime   0.03561 *  
## mean_tilt_scaled:autism_score                 0.11969    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##                   (Intr) dprm_bttry_p_d dprm_bttry_p_p lxtl_s itln__ dp___:_
## dprm_bttry_p_d     0.059                                                    
## dprm_bttry_p_p    -0.023 -0.156                                             
## lextale_scr        0.057  0.034          0.114                              
## itln_lxtl_s       -0.989 -0.042          0.017         -0.195               
## dprm_bt__:_       -0.001 -0.515         -0.012          0.003  0.001        
## dprm_bttry_p_d:__ -0.023 -0.140          0.009          0.006  0.016 -0.172 
## dprm_bttry_p_p:__  0.004 -0.018         -0.066         -0.006 -0.004  0.007 
## mn_ptc_:___       -0.024 -0.012         -0.008         -0.019  0.026  0.001 
## mn_tlt_:___       -0.045  0.007         -0.110         -0.054  0.053  0.017 
## mn_tlt_sc:_       -0.075 -0.063         -0.023         -0.004  0.063 -0.037 
##                   dprm_bttry_p_d:__ dprm_bttry_p_p:__ mn_p_:___ mn_t_:___
## dprm_bttry_p_d                                                           
## dprm_bttry_p_p                                                           
## lextale_scr                                                              
## itln_lxtl_s                                                              
## dprm_bt__:_                                                              
## dprm_bttry_p_d:__                                                        
## dprm_bttry_p_p:__  0.047                                                 
## mn_ptc_:___       -0.019            -0.436                               
## mn_tlt_:___       -0.246            -0.014             0.191             
## mn_tlt_sc:_        0.463             0.003             0.015     0.021   
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.011968 (tol = 0.002, component 1)
print(result_pp_data_1$summary)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: target_looks ~ dprime_battery_up_risetime + mean_pitch_scaled +  
##     duration_scaled + mean_amplitude_scaled + dprime_battery_up_dur:mean_amplitude_scaled +  
##     dprime_battery_up_ptich:mean_pitch_scaled + dprime_battery_up_ptich:duration_scaled +  
##     lextale_score:mean_tilt_scaled + autism_score:mean_tilt_scaled +  
##     italian_lextale_score:mean_amplitude_scaled + (1 | word)
##    Data: data_filtered
## 
##      AIC      BIC   logLik deviance df.resid 
##   3089.8   3160.3  -1532.9   3065.8     2622 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -0.9948 -0.6322 -0.5346  1.1321  2.3729 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  word   (Intercept) 0.1263   0.3554  
## Number of obs: 2634, groups:  word, 24
## 
## Fixed effects:
##                                             Estimate Std. Error z value
## (Intercept)                                 -1.04585    0.29216  -3.580
## dprime_battery_up_risetime                  -0.39073    0.40509  -0.965
## mean_pitch_scaled                           -0.06378    0.09418  -0.677
## duration_scaled                             -0.11509    0.27094  -0.425
## mean_amplitude_scaled                        0.58582    1.80642   0.324
## mean_amplitude_scaled:dprime_battery_up_dur  0.25248    0.38781   0.651
## mean_pitch_scaled:dprime_battery_up_ptich    0.11475    0.27933   0.411
## duration_scaled:dprime_battery_up_ptich      0.19011    0.30430   0.625
## lextale_score:mean_tilt_scaled               0.18939    0.19524   0.970
## mean_tilt_scaled:autism_score                0.18958    0.30221   0.627
## mean_amplitude_scaled:italian_lextale_score -0.66010    1.84596  -0.358
##                                             Pr(>|z|)    
## (Intercept)                                 0.000344 ***
## dprime_battery_up_risetime                  0.334766    
## mean_pitch_scaled                           0.498288    
## duration_scaled                             0.670999    
## mean_amplitude_scaled                       0.745712    
## mean_amplitude_scaled:dprime_battery_up_dur 0.515024    
## mean_pitch_scaled:dprime_battery_up_ptich   0.681214    
## duration_scaled:dprime_battery_up_ptich     0.532131    
## lextale_score:mean_tilt_scaled              0.332025    
## mean_tilt_scaled:autism_score               0.530449    
## mean_amplitude_scaled:italian_lextale_score 0.720650    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) dpr___ mn_pt_ drtn_s mn_mp_ mn_m_:___ mn_p_:___ d_:___
## dprm_bttr__  0.114                                                       
## mn_ptch_scl -0.093 -0.003                                                
## duratn_scld  0.925  0.052 -0.192                                         
## mn_mpltd_sc -0.014 -0.028  0.004 -0.009                                  
## mn_mpl_:___  0.001 -0.010  0.014  0.002  0.046                           
## mn_ptc_:___ -0.004 -0.009  0.417 -0.037  0.013  0.043                    
## drtn_sc:___  0.040  0.322 -0.090  0.160  0.003  0.001    -0.223          
## lxtl_scr:__  0.221 -0.005  0.085  0.130  0.084 -0.027    -0.049     0.068
## mn_tlt_sc:_  0.056  0.012  0.053  0.021 -0.104  0.041     0.060    -0.073
## mn_mplt_:__  0.001  0.028 -0.001  0.000 -0.999 -0.019    -0.012    -0.003
##             lx_:__ mn__:_
## dprm_bttr__              
## mn_ptch_scl              
## duratn_scld              
## mn_mpltd_sc              
## mn_mpl_:___              
## mn_ptc_:___              
## drtn_sc:___              
## lxtl_scr:__              
## mn_tlt_sc:_ -0.700       
## mn_mplt_:__ -0.096  0.103
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.00760328 (tol = 0.002, component 1)
print(result_ap_data_3$summary)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## target_looks ~ dprime_battery_up_dur + lextale_score + dprime_battery_up_dur:mean_pitch_scaled +  
##     dprime_battery_up_dur:mean_tilt_scaled + dprime_battery_up_ptich:mean_pitch_scaled +  
##     dprime_battery_up_ptich:mean_tilt_scaled + dprime_battery_up_risetime:mean_pitch_scaled +  
##     lextale_score:mean_pitch_scaled + (1 | word)
##    Data: data_filtered
## 
##      AIC      BIC   logLik deviance df.resid 
##   3195.0   3253.1  -1587.5   3175.0     2465 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4416 -0.7549 -0.6334  1.1529  1.9937 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  word   (Intercept) 0.03065  0.1751  
## Number of obs: 2475, groups:  word, 23
## 
## Fixed effects:
##                                              Estimate Std. Error z value
## (Intercept)                                   -1.9924     0.3051  -6.531
## dprime_battery_up_dur                         -2.3587     1.4371  -1.641
## lextale_score                                  0.6420     0.4704   1.365
## dprime_battery_up_dur:mean_pitch_scaled       -0.2635     1.6336  -0.161
## dprime_battery_up_dur:mean_tilt_scaled         0.5143     0.5085   1.011
## mean_pitch_scaled:dprime_battery_up_ptich     -0.7025     0.3616  -1.942
## mean_tilt_scaled:dprime_battery_up_ptich       0.4910     0.3569   1.376
## mean_pitch_scaled:dprime_battery_up_risetime  -0.7644     0.4579  -1.669
## lextale_score:mean_pitch_scaled               -1.1386     0.3560  -3.199
##                                              Pr(>|z|)    
## (Intercept)                                  6.52e-11 ***
## dprime_battery_up_dur                         0.10074    
## lextale_score                                 0.17236    
## dprime_battery_up_dur:mean_pitch_scaled       0.87183    
## dprime_battery_up_dur:mean_tilt_scaled        0.31184    
## mean_pitch_scaled:dprime_battery_up_ptich     0.05208 .  
## mean_tilt_scaled:dprime_battery_up_ptich      0.16885    
## mean_pitch_scaled:dprime_battery_up_risetime  0.09509 .  
## lextale_score:mean_pitch_scaled               0.00138 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##                             (Intr) dpr___ lxtl_s dprm_bttry_p_dr:mn_p_
## dprm_bttr__                  0.046                                    
## lextale_scr                 -0.756  0.417                             
## dprm_bttry_p_dr:mn_p_        0.003  0.942  0.427                      
## dprm_bttry_p_dr:mn_t_       -0.005  0.327  0.054  0.263               
## mn_ptch_scld:dprm_bttry_p_p  0.105  0.017 -0.065 -0.043               
## mn_tlt_:___                  0.006 -0.102 -0.106 -0.069               
## mn_ptch_scld:dprm_bttry_p_r -0.119 -0.003  0.091 -0.055               
## lxtl_scr:__                  0.009  0.648  0.608  0.678               
##                             dprm_bttry_p_dr:mn_t_ mn_ptch_scld:dprm_bttry_p_p
## dprm_bttr__                                                                  
## lextale_scr                                                                  
## dprm_bttry_p_dr:mn_p_                                                        
## dprm_bttry_p_dr:mn_t_                                                        
## mn_ptch_scld:dprm_bttry_p_p  0.080                                           
## mn_tlt_:___                 -0.517                -0.163                     
## mn_ptch_scld:dprm_bttry_p_r  0.011                -0.325                     
## lxtl_scr:__                  0.069                 0.116                     
##                             mn_t_:___ mn_ptch_scld:dprm_bttry_p_r
## dprm_bttr__                                                      
## lextale_scr                                                      
## dprm_bttry_p_dr:mn_p_                                            
## dprm_bttry_p_dr:mn_t_                                            
## mn_ptch_scld:dprm_bttry_p_p                                      
## mn_tlt_:___                                                      
## mn_ptch_scld:dprm_bttry_p_r  0.000                               
## lxtl_scr:__                 -0.143     0.022                     
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.0039877 (tol = 0.002, component 1)
print(result_pp_data_3$summary)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: target_looks ~ dprime_battery_up_dur + lextale_score + autism_score +  
##     italian_lextale_score + dprime_battery_up_dur:mean_pitch_scaled +  
##     dprime_battery_up_formants:duration_scaled + autism_score:mean_pitch_scaled +  
##     (1 | word)
##    Data: data_filtered
## 
##      AIC      BIC   logLik deviance df.resid 
##   3392.5   3445.1  -1687.2   3374.5     2552 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.2893 -0.7930 -0.6725  1.1483  1.7532 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  word   (Intercept) 0.08264  0.2875  
## Number of obs: 2561, groups:  word, 24
## 
## Fixed effects:
##                                            Estimate Std. Error z value Pr(>|z|)
## (Intercept)                                 -7.6840     2.2153  -3.469 0.000523
## dprime_battery_up_dur                       -0.8787     0.6379  -1.377 0.168388
## lextale_score                                0.7306     0.3624   2.016 0.043791
## autism_score                                 0.3281     0.3595   0.913 0.361505
## italian_lextale_score                        6.5459     2.2736   2.879 0.003989
## dprime_battery_up_dur:mean_pitch_scaled     -0.2193     0.5289  -0.415 0.678439
## dprime_battery_up_formants:duration_scaled   0.6316     0.3416   1.849 0.064511
## autism_score:mean_pitch_scaled               0.1938     0.2296   0.844 0.398641
##                                               
## (Intercept)                                ***
## dprime_battery_up_dur                         
## lextale_score                              *  
## autism_score                                  
## italian_lextale_score                      ** 
## dprime_battery_up_dur:mean_pitch_scaled       
## dprime_battery_up_formants:duration_scaled .  
## autism_score:mean_pitch_scaled                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) dpr___ lxtl_s atsm_s itln__ d___:__ dp___:_
## dprm_bttr__  0.063                                            
## lextale_scr  0.062  0.034                                     
## autism_scor -0.243  0.352  0.016                              
## itln_lxtl_s -0.989 -0.064 -0.190  0.193                       
## dprm_b__:__ -0.020 -0.701 -0.006 -0.329  0.023                
## dprm_bt__:_ -0.195 -0.305  0.065 -0.099  0.207  0.125         
## atsm_scr:__  0.003 -0.394 -0.010 -0.513 -0.003  0.589  -0.038 
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.0557315 (tol = 0.002, component 1)
print(result_ap_data_1$plot)

print(result_pp_data_1$plot)

print(result_ap_data_3$plot)

print(result_pp_data_3$plot)

# Install and load ggtext if not already installed
if (!requireNamespace("ggtext", quietly = TRUE)) {
  install.packages("ggtext")
}
library(ggtext)

# Define variables
acoustic_vars <- c("mean_pitch_scaled", "mean_amplitude_scaled", "mean_tilt_scaled", "duration_scaled")
id_vars <- c("dprime_battery_up_dur", "dprime_battery_up_ptich", "dprime_battery_up_risetime", "dprime_battery_up_formants", "lextale_score", "autism_score", "italian_lextale_score", "lextale_score")

# Convert the summaries into data frames
result_ap_data_1_coefs <- as.data.frame(result_ap_data_1$summary$coef)
result_ap_data_1_coefs$model <- "ap_data_1"
result_ap_data_3_coefs <- as.data.frame(result_ap_data_3$summary$coef)
result_ap_data_3_coefs$model <- "ap_data_3"
result_pp_data_1_coefs <- as.data.frame(result_pp_data_1$summary$coef)
result_pp_data_1_coefs$model <- "pp_data_1"
result_pp_data_3_coefs <- as.data.frame(result_pp_data_3$summary$coef)
result_pp_data_3_coefs$model <- "pp_data_3"
all_coefs <- rbind(result_ap_data_1_coefs, result_ap_data_3_coefs, result_pp_data_1_coefs, result_pp_data_3_coefs)

# Prepare data for plotting
all_coefs <- all_coefs %>%
  rownames_to_column("term") %>%
  mutate(conf.low = Estimate - 1.96 * `Std. Error`,
         conf.high = Estimate + 1.96 * `Std. Error`,
         significance = case_when(
           `Pr(>|z|)` < 0.001 ~ "***",
           `Pr(>|z|)` < 0.01 ~ "**",
           `Pr(>|z|)` < 0.05 ~ "*",
           TRUE ~ ""
         ),
         model_type = ifelse(str_detect(model, "1"), "Model 1", "Model 3"),
         model_color = ifelse(str_detect(model, "pp"), "PP", "AP"),
         term_label = case_when(
           term %in% acoustic_vars & term %in% id_vars ~ "both",
           term %in% acoustic_vars ~ "acoustic",
           term %in% id_vars ~ "id",
           TRUE ~ "other"
         ),
         interaction_label = case_when(
           str_detect(term, "Intercept") ~ 1,
           str_detect(term, ":") ~ 3,
           TRUE ~ 2
         ))

# Update term names for better readability and bold acoustic terms
all_coefs <- all_coefs %>%
  mutate(old_term = term) %>%
  mutate(term = str_replace_all(term, "1", "")) %>%
  mutate(term = str_replace_all(term, "2", "")) %>%
  mutate(term = str_replace_all(term, "3", "")) %>%
  mutate(term = str_replace_all(term, "_", " ")) %>%
  mutate(term = str_replace_all(term, "dprime battery up", "(battery d')")) %>%
  mutate(term = str_replace_all(term, "mean", "")) %>%
  mutate(term = str_replace_all(term, "italian lextale score", "lextale (Italian)")) %>%
  mutate(term = str_replace_all(term, "lextale score", "lextale (English)")) %>%
  mutate(term = str_replace_all(term, "scaled", "")) %>%
  mutate(term = str_replace_all(term, "dprime", "")) %>%
  mutate(term = str_replace_all(term, "Autism score", "score")) %>%
  mutate(term = str_replace_all(term, "tilt", "spec tilt")) %>%
  mutate(term = str_replace_all(term, "(battery d') dur", "(battery d') duration")) %>%
  mutate(term = ifelse(old_term %in% acoustic_vars, paste0("**", term, "**"), term))

# Define the order of terms
ordered_terms <- all_coefs %>%
  arrange(interaction_label, term_label, model) %>%
  pull(term) %>%
  unique()
ordered_terms
##  [1] "(Intercept)"                       "** pitch **"                      
##  [3] "**duration **"                     "** amplitude **"                  
##  [5] "(battery d') dur"                  "(battery d') ptich"               
##  [7] "lextale (English)"                 "lextale (Italian)"                
##  [9] "(battery d') risetime"             "autism score"                     
## [11] "(battery d') dur:duration "        "(battery d') dur: spec tilt "     
## [13] "(battery d') ptich: pitch "        " pitch :(battery d') risetime"    
## [15] " spec tilt :(battery d') risetime" " spec tilt :autism score"         
## [17] "(battery d') dur: pitch "          " pitch :(battery d') ptich"       
## [19] " spec tilt :(battery d') ptich"    "lextale (English): pitch "        
## [21] " amplitude :(battery d') dur"      "duration :(battery d') ptich"     
## [23] "lextale (English): spec tilt "     " amplitude :lextale (Italian)"    
## [25] "(battery d') formants:duration "   "autism score: pitch "
# Set factor levels for term
all_coefs$term <- factor(all_coefs$term, levels = ordered_terms)

# Function to create coefficient plot with confidence intervals and significance markers
create_coef_plot <- function(coef_df, model_label, remove_y_labels = FALSE) {
  plot <- ggplot(coef_df, aes(x = Estimate, y = term, color = model_color)) +
    geom_point(position = position_dodge(width = 0.5)) +
    geom_errorbarh(aes(xmin = conf.low, xmax = conf.high), height = 0.2, position = position_dodge(width = 0.5)) +
    geom_text(aes(label = significance, x = -11, color = model_color), vjust = 0.5, position = position_dodge(width = 0.5)) +
    labs(title = paste("Coefficient Estimates with Confidence Intervals -", model_label),
         x = "Estimate",
         y = if (remove_y_labels) NULL else "Predictor",
         color = "Model Type") +
    theme_minimal() +
    theme(legend.position = "top",
          axis.text.y = if (remove_y_labels) element_blank() else element_markdown(),
          axis.ticks.y = if (remove_y_labels) element_blank() else element_line()) +
    facet_wrap(~ model_type, scales = "free")
  return(plot)
}

# Create and display the plot
plot <- create_coef_plot(all_coefs, "")
plot